Add courtlistener intergration, liquid glass redesign, UI improvements, version control, various fixes

This commit is contained in:
willchen96 2026-06-06 15:48:47 +08:00
parent d39f5806e5
commit 44e868eb42
106 changed files with 16350 additions and 7753 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -9,6 +9,8 @@ interface DocRow {
}
interface VersionPathRow extends DocRow {
/** API/client alias for document_versions.filename of the active version. */
filename?: string | null;
/** Set from document_versions.storage_path of the active version. */
storage_path?: string | null;
/** Set from document_versions.pdf_storage_path of the active version. */
@ -16,6 +18,10 @@ interface VersionPathRow extends DocRow {
current_version_id?: string | null;
/** Set from document_versions.version_number of the active version. */
active_version_number?: number | null;
/** Active-version file metadata. */
file_type?: string | null;
size_bytes?: number | null;
page_count?: number | null;
}
export interface ActiveVersion {
@ -23,8 +29,11 @@ export interface ActiveVersion {
storage_path: string;
pdf_storage_path: string | null;
version_number: number | null;
display_name: string | null;
filename: string | null;
source: string | null;
file_type: string | null;
size_bytes: number | null;
page_count: number | null;
}
/**
@ -54,7 +63,7 @@ export async function loadActiveVersion(
const { data: v } = await db
.from("document_versions")
.select(
"id, document_id, storage_path, pdf_storage_path, version_number, display_name, source",
"id, document_id, storage_path, pdf_storage_path, version_number, filename, source, file_type, size_bytes, page_count",
)
.eq("id", targetVersionId)
.single();
@ -64,8 +73,11 @@ export async function loadActiveVersion(
storage_path: v.storage_path as string,
pdf_storage_path: (v.pdf_storage_path as string | null) ?? null,
version_number: (v.version_number as number | null) ?? null,
display_name: (v.display_name as string | null) ?? null,
filename: (v.filename as string | null) ?? null,
source: (v.source as string | null) ?? null,
file_type: (v.file_type as string | null) ?? null,
size_bytes: (v.size_bytes as number | null) ?? null,
page_count: (v.page_count as number | null) ?? null,
};
}
@ -85,14 +97,20 @@ export async function attachActiveVersionPaths<T extends VersionPathRow>(
.filter((id): id is string => typeof id === "string");
if (versionIds.length === 0) {
for (const d of docs) {
d.filename = "Untitled document";
d.storage_path = null;
d.pdf_storage_path = null;
d.file_type = null;
d.size_bytes = null;
d.page_count = null;
}
return docs;
}
const { data: rows } = await db
.from("document_versions")
.select("id, storage_path, pdf_storage_path, version_number")
.select(
"id, storage_path, pdf_storage_path, version_number, filename, file_type, size_bytes, page_count",
)
.in("id", versionIds);
const byId = new Map<
string,
@ -100,6 +118,10 @@ export async function attachActiveVersionPaths<T extends VersionPathRow>(
storage_path: string | null;
pdf_storage_path: string | null;
version_number: number | null;
filename: string | null;
file_type: string | null;
size_bytes: number | null;
page_count: number | null;
}
>();
for (const r of (rows ?? []) as {
@ -107,11 +129,19 @@ export async function attachActiveVersionPaths<T extends VersionPathRow>(
storage_path: string | null;
pdf_storage_path: string | null;
version_number: number | null;
filename: string | null;
file_type: string | null;
size_bytes: number | null;
page_count: number | null;
}[]) {
byId.set(r.id, {
storage_path: r.storage_path ?? null,
pdf_storage_path: r.pdf_storage_path ?? null,
version_number: r.version_number ?? null,
filename: r.filename ?? null,
file_type: r.file_type ?? null,
size_bytes: r.size_bytes ?? null,
page_count: r.page_count ?? null,
});
}
for (const d of docs) {
@ -119,6 +149,10 @@ export async function attachActiveVersionPaths<T extends VersionPathRow>(
d.storage_path = v?.storage_path ?? null;
d.pdf_storage_path = v?.pdf_storage_path ?? null;
d.active_version_number = v?.version_number ?? null;
d.filename = v?.filename?.trim() || "Untitled document";
d.file_type = v?.file_type ?? null;
d.size_bytes = v?.size_bytes ?? null;
d.page_count = v?.page_count ?? null;
}
return docs;
}

View file

@ -0,0 +1,197 @@
export type CourtlistenerToolEvent =
| {
type: "courtlistener_search_case_law";
query: string;
result_count: number;
error?: string;
}
| {
type: "courtlistener_get_cases";
cluster_ids: number[];
case_count: number;
opinion_count: number;
cases?: {
cluster_id: number;
case_name: string | null;
citation: string | null;
dateFiled?: string | null;
url?: string | null;
}[];
error?: string;
}
| {
type: "courtlistener_find_in_case";
cluster_id: number | null;
query: string;
total_matches: number;
case_name?: string | null;
citation?: string | null;
searches?: {
cluster_id: number | null;
query: string;
total_matches: number;
case_name?: string | null;
citation?: string | null;
error?: string;
}[];
error?: string;
}
| {
type: "courtlistener_read_case";
cluster_id: number | null;
case_name?: string | null;
citation?: string | null;
opinion_count: number;
error?: string;
}
| {
type: "courtlistener_verify_citations";
citation_count: number;
match_count: number;
error?: string;
};
export type CaseCitationEvent = {
type: "case_citation";
cluster_id: number | null;
case_name: string | null;
citation: string | null;
url: string;
pdfUrl?: string | null;
dateFiled?: string | null;
judges?: string | null;
};
export const COURTLISTENER_TOOL_NAMES = {
searchCaseLaw: "courtlistener_search_case_law",
getCases: "courtlistener_get_cases",
findInCase: "courtlistener_find_in_case",
readCase: "courtlistener_read_case",
verifyCitations: "courtlistener_verify_citations",
} as const;
export const COURTLISTENER_SYSTEM_PROMPT = `LEGAL RESEARCH QUERIES:
- When a user asks a question on US law, you are required to cite relevant case law in your answer. Always verify US case citations using the courtlistener_verify_citations tool.
- If the user gives case names or reporter citations, use courtlistener_verify_citations for those names/citations.
- CourtListener keyword/issue search is not available. Do not attempt to search CourtListener for new candidate cases by legal issue or keywords. Work only from cases/citations supplied by the user, cases found in the provided documents, or citations already present in the conversation.
- If any CourtListener tool call reports that a CourtListener rate limit was exceeded, or returns a 429/throttled/rate-limit error, do not make any further CourtListener API/search calls in that turn. Do not retry, verify more citations, fetch more cases, or run additional CourtListener searches; answer with the information already available and briefly state that CourtListener is rate limiting requests.
- For cases you may cite or materially rely on, follow this sequence: first use courtlistener_verify_citations for case names/citations, then use courtlistener_get_cases to fetch/cache the relevant case clusters, then use courtlistener_find_in_case to search targeted keywords in the cached opinions, and only if those keyword snippets are insufficient use courtlistener_read_case to read selected opinion text.
- Only cite cases whose underlying opinion text, or at least the specific relevant opinion passages, has been supplied to you in this turn. courtlistener_get_cases only fetches and caches opinions; it does NOT place full opinion text in your context. It returns text-free opinion metadata so you can choose which opinion(s) matter. After courtlistener_get_cases, use courtlistener_find_in_case for targeted keyword or phrase lookup inside that cached case. If those snippets are not enough, use courtlistener_read_case to read only the specific already-fetched opinion(s) you need. courtlistener_find_in_case and courtlistener_read_case require the case to have been fetched first.
- When a fetched case has multiple opinions, do not read all opinions by default. Choose the specific opinion_id or opinion_ids needed from the metadata or search hits. Prefer the lead/majority/controlling opinion when it is sufficient; read concurrences, dissents, or combined opinions only when they are necessary for the user's question.
- When using courtlistener_find_in_case, search for terms that are 1-3 words long and actually likely to appear exactly as written in the opinion text. Do not use long sentence-like phrases. Run courtlistener_find_in_case no more than 3 times in a single assistant turn; if those searches are insufficient, read the smallest needed opinion text with courtlistener_read_case or answer with the available information.
- Do not cite a case based only on memory, search-result snippets, reporter metadata, citationLinks, or verification results. Those sources may help choose candidates, but final case citations must be grounded in supplied opinion text/passages.
- Every case citation in final prose must be rendered as a clickable case-law panel link using the markdown link returned in citationLinks, e.g. [Case Name, Citation](us-case-12345). Do not write plain-text case citations without the link.
- Use numbered [N] markers for case citations in the final prose and include each cited case in the final <CITATIONS> block.
- Each case entry in the <CITATIONS> block must include quote(s) copied exactly from the supplied opinion text/passages for that case, e.g. {"ref": N, "cluster_id": 123, "quotes": [{"opinion_id": 456, "quote": "exact verbatim opinion text"}]}. Do not include top-level "quote", "doc_id", "page", "case_name", or "citation" for case entries.
- If a case is useful but you do not have its opinion text or relevant passages, either fetch the opinions before citing it or say that you could not read the opinion and do not cite or characterize the case beyond basic metadata.`;
export const COURTLISTENER_TOOLS = [
{
type: "function",
function: {
name: COURTLISTENER_TOOL_NAMES.getCases,
description:
"Fetch and cache one or more CourtListener case clusters and their opinions by cluster ID. This returns metadata/counts only, not full opinion text. After this, call courtlistener_find_in_case for targeted passages or courtlistener_read_case if broader full-case context is needed.",
parameters: {
type: "object",
properties: {
clusterIds: {
type: "array",
items: { type: "integer" },
description:
"CourtListener cluster IDs from courtlistener_verify_citations or other case metadata already present in the conversation.",
},
},
required: ["clusterIds"],
},
},
},
{
type: "function",
function: {
name: COURTLISTENER_TOOL_NAMES.findInCase,
description:
"Search within an already-fetched CourtListener case cluster for specific keyword(s) or phrases. Returns matches with surrounding opinion context. Call courtlistener_get_cases first; this tool does not fetch cases. Use no more than 3 calls to this tool in a single assistant turn.",
parameters: {
type: "object",
properties: {
clusterId: {
type: "integer",
description:
"CourtListener cluster ID previously fetched with courtlistener_get_cases.",
},
query: {
type: "string",
description:
"Short term to search for, 1-3 words long and likely to appear exactly as written in the opinion text. Matching is case-insensitive and collapses whitespace.",
},
max_results: {
type: "integer",
description:
"Maximum number of matches to return. Default 20.",
},
context_chars: {
type: "integer",
description:
"Characters of surrounding context to include on each side of each match. Default 160.",
},
},
required: ["clusterId", "query"],
},
},
},
{
type: "function",
function: {
name: COURTLISTENER_TOOL_NAMES.readCase,
description:
"Read selected opinion text from an already-fetched CourtListener case cluster in this turn's cache. Use after courtlistener_find_in_case if snippets are insufficient. If the case has multiple opinions, pass only the opinionId/opinionIds needed. Call courtlistener_get_cases first; this tool does not fetch cases.",
parameters: {
type: "object",
properties: {
clusterId: {
type: "integer",
description:
"CourtListener cluster ID previously fetched with courtlistener_get_cases.",
},
opinionId: {
type: "integer",
description:
"Specific opinion ID to read. Use when one opinion is enough.",
},
opinionIds: {
type: "array",
items: { type: "integer" },
description:
"Specific opinion IDs to read. Use the smallest set needed; do not read all opinions unless the question requires it.",
},
},
required: ["clusterId"],
},
},
},
{
type: "function",
function: {
name: COURTLISTENER_TOOL_NAMES.verifyCitations,
description:
"Verify legal case citations using CourtListener's citation lookup. Accepts raw text containing citations, or multiple citation strings. This returns citation metadata and clickable case refs; call courtlistener_get_cases only for matched cases that need full opinion text.",
parameters: {
type: "object",
properties: {
text: {
type: "string",
description:
"Raw text containing one or more legal citations. Max 64,000 characters sent to CourtListener.",
},
citations: {
type: "array",
items: { type: "string" },
description:
"Optional list of citation strings. Up to 250 will be joined into the request text field.",
},
},
},
},
},
];

View file

@ -7,6 +7,7 @@ import type {
NormalizedToolResult,
} from "./types";
import { toClaudeTools } from "./tools";
import { logRawLlmStream } from "./rawStreamLog";
type ContentBlock =
| { type: "text"; text: string }
@ -41,6 +42,65 @@ function toNativeMessages(
return messages.map((m) => ({ role: m.role, content: m.content }));
}
function claudeErrorMessage(error: unknown): string {
const parsedObject = claudeStreamFailureMessage(error);
if (parsedObject) return parsedObject;
if (error instanceof Error && error.message) {
const parsed = parseClaudeErrorPayload(error.message);
if (parsed) return parsed;
return error.message.startsWith("Claude error:")
? error.message
: `Claude error: ${error.message}`;
}
const parsed = parseClaudeErrorPayload(String(error));
if (parsed) return parsed;
return `Claude error: ${String(error)}`;
}
function parseClaudeErrorPayload(value: string): string | null {
const trimmed = value.trim();
const jsonStart = trimmed.indexOf("{");
if (jsonStart < 0) return null;
const jsonEnd = trimmed.lastIndexOf("}");
if (jsonEnd <= jsonStart) return null;
const payload = trimmed.slice(jsonStart, jsonEnd + 1);
try {
const parsed = JSON.parse(payload) as unknown;
return claudeStreamFailureMessage(parsed);
} catch {
return null;
}
}
function claudeStreamFailureMessage(event: unknown): string | null {
if (!event || typeof event !== "object") return null;
const record = event as Record<string, unknown>;
const error = record.error;
if (record.type !== "error" || !error || typeof error !== "object") {
return null;
}
const err = error as Record<string, unknown>;
const type =
typeof err.type === "string" && err.type.trim()
? err.type.trim()
: null;
const message =
typeof err.message === "string" && err.message.trim()
? err.message.trim()
: "Claude stream failed.";
return type ? `Claude error (${type}): ${message}` : `Claude error: ${message}`;
}
function abortError(): Error {
const err = new Error("Stream aborted.");
err.name = "AbortError";
return err;
}
function throwIfAborted(signal?: AbortSignal) {
if (signal?.aborted) throw abortError();
}
export async function streamClaude(
params: StreamChatParams,
): Promise<StreamChatResult> {
@ -61,6 +121,7 @@ export async function streamClaude(
let fullText = "";
for (let iter = 0; iter < maxIter; iter++) {
throwIfAborted(params.abortSignal);
const stream = anthropic.messages.stream({
model,
system: systemPrompt,
@ -82,6 +143,35 @@ export async function streamClaude(
});
let sawThinking = false;
let streamFailureMessage: string | null = null;
const abortStream = () => stream.abort();
params.abortSignal?.addEventListener("abort", abortStream, {
once: true,
});
stream.on("streamEvent", (event) => {
logRawLlmStream({
provider: "claude",
model,
iteration: iter,
label: "streamEvent",
payload: event,
});
const failureMessage = claudeStreamFailureMessage(event);
if (failureMessage) {
streamFailureMessage = failureMessage;
stream.abort();
}
});
stream.on("error", (error) => {
logRawLlmStream({
provider: "claude",
model,
iteration: iter,
label: "error",
payload: error,
});
});
stream.on("text", (delta) => {
callbacks.onContentDelta?.(delta);
@ -93,8 +183,18 @@ export async function streamClaude(
});
}
const final = await stream.finalMessage();
let final: Awaited<ReturnType<typeof stream.finalMessage>>;
try {
final = await stream.finalMessage();
} catch (error) {
if (params.abortSignal?.aborted) throw abortError();
if (streamFailureMessage) throw new Error(streamFailureMessage);
throw new Error(claudeErrorMessage(error));
} finally {
params.abortSignal?.removeEventListener("abort", abortStream);
}
if (sawThinking) callbacks.onReasoningBlockEnd?.();
throwIfAborted(params.abortSignal);
const stopReason = final.stop_reason;
const assistantBlocks = final.content as ContentBlock[];
@ -126,6 +226,7 @@ export async function streamClaude(
}
const results = await runTools(toolCalls);
throwIfAborted(params.abortSignal);
// Record the assistant turn (preserving the original content blocks,
// which Claude requires on the follow-up) and the user turn that
@ -152,12 +253,17 @@ export async function completeClaudeText(params: {
apiKeys?: { claude?: string | null };
}): Promise<string> {
const anthropic = client(params.apiKeys?.claude);
const resp = await anthropic.messages.create({
model: params.model,
max_tokens: params.maxTokens ?? 512,
system: params.systemPrompt,
messages: [{ role: "user", content: params.user }],
});
let resp: Awaited<ReturnType<typeof anthropic.messages.create>>;
try {
resp = await anthropic.messages.create({
model: params.model,
max_tokens: params.maxTokens ?? 512,
system: params.systemPrompt,
messages: [{ role: "user", content: params.user }],
});
} catch (error) {
throw new Error(claudeErrorMessage(error));
}
const text = resp.content
.filter((b): b is Anthropic.TextBlock => b.type === "text")
.map((b) => b.text)

View file

@ -5,6 +5,7 @@ import type {
NormalizedToolCall,
} from "./types";
import { toGeminiTools } from "./tools";
import { logRawLlmStream } from "./rawStreamLog";
type GeminiPart = {
text?: string;
@ -49,6 +50,113 @@ function toNativeContents(messages: StreamChatParams["messages"]): GeminiContent
}));
}
function geminiErrorMessage(error: unknown): string {
const parsedObject = geminiStreamFailureMessage(error);
if (parsedObject) return parsedObject;
if (typeof error === "string") {
const parsed = parseGeminiErrorPayload(error);
if (parsed) return parsed;
return error.startsWith("Gemini error:")
? error
: `Gemini error: ${error}`;
}
if (error instanceof Error && error.message) {
const parsed = parseGeminiErrorPayload(error.message);
if (parsed) return parsed;
return error.message.startsWith("Gemini error:")
? error.message
: `Gemini error: ${error.message}`;
}
return `Gemini error: ${String(error)}`;
}
function parseGeminiErrorPayload(value: string): string | null {
const trimmed = value.trim();
if (!trimmed.startsWith("{")) return null;
try {
const parsed = JSON.parse(trimmed) as unknown;
return geminiStreamFailureMessage(parsed);
} catch {
return null;
}
}
function geminiStreamFailureMessage(chunk: unknown): string | null {
if (!chunk || typeof chunk !== "object") return null;
const record = chunk as Record<string, unknown>;
const error = record.error;
if (error && typeof error === "object") {
const err = error as Record<string, unknown>;
const nested =
typeof err.message === "string"
? parseGeminiErrorPayload(err.message)
: null;
if (nested) return nested;
const message =
typeof err.message === "string" && err.message.trim()
? err.message.trim()
: "Gemini stream failed.";
const code =
typeof err.code === "string" && err.code.trim()
? err.code.trim()
: typeof err.code === "number" && Number.isFinite(err.code)
? String(err.code)
: typeof err.status === "string" && err.status.trim()
? err.status.trim()
: null;
return code ? `Gemini error (${code}): ${message}` : `Gemini error: ${message}`;
}
const promptFeedback = record.promptFeedback;
if (promptFeedback && typeof promptFeedback === "object") {
const feedback = promptFeedback as Record<string, unknown>;
const blockReason =
typeof feedback.blockReason === "string"
? feedback.blockReason
: null;
if (blockReason) {
const detail =
typeof feedback.blockReasonMessage === "string" &&
feedback.blockReasonMessage.trim()
? feedback.blockReasonMessage.trim()
: "The Gemini response was blocked.";
return `Gemini error (${blockReason}): ${detail}`;
}
}
const candidates = Array.isArray(record.candidates)
? (record.candidates as Record<string, unknown>[])
: [];
const finishReason =
typeof candidates[0]?.finishReason === "string"
? candidates[0].finishReason
: null;
const errorFinishReasons = new Set([
"SAFETY",
"RECITATION",
"BLOCKLIST",
"PROHIBITED_CONTENT",
"SPII",
"MALFORMED_FUNCTION_CALL",
"OTHER",
]);
if (finishReason && errorFinishReasons.has(finishReason)) {
return `Gemini error (${finishReason}): The Gemini stream ended with an error finish reason.`;
}
return null;
}
function abortError(): Error {
const err = new Error("Stream aborted.");
err.name = "AbortError";
return err;
}
function throwIfAborted(signal?: AbortSignal) {
if (signal?.aborted) throw abortError();
}
export async function streamGemini(
params: StreamChatParams,
): Promise<StreamChatResult> {
@ -61,61 +169,103 @@ export async function streamGemini(
let fullText = "";
for (let iter = 0; iter < maxIter; iter++) {
const stream = await ai.models.generateContentStream({
model,
contents: contents as never,
config: {
systemInstruction: systemPrompt,
tools: functionDeclarations.length
? [{ functionDeclarations } as never]
: undefined,
// When enabled, ask Gemini to surface thought summaries.
// When disabled, explicitly zero the thinking budget so the
// model skips thinking entirely (saves tokens and latency
// for bulk extraction jobs).
thinkingConfig: enableThinking
? { includeThoughts: true }
: { thinkingBudget: 0 },
},
});
throwIfAborted(params.abortSignal);
let stream: AsyncIterable<unknown>;
try {
stream = await ai.models.generateContentStream({
model,
contents: contents as never,
config: {
systemInstruction: systemPrompt,
tools: functionDeclarations.length
? [{ functionDeclarations } as never]
: undefined,
// When enabled, ask Gemini to surface thought summaries.
// When disabled, explicitly zero the thinking budget so the
// model skips thinking entirely (saves tokens and latency
// for bulk extraction jobs).
thinkingConfig: enableThinking
? { includeThoughts: true }
: { thinkingBudget: 0 },
},
});
} catch (error) {
throw new Error(geminiErrorMessage(error));
}
// Per-iteration accumulators.
const textParts: string[] = [];
const callParts: GeminiPart[] = [];
const toolCalls: NormalizedToolCall[] = [];
let sawThinking = false;
const iterator = stream[Symbol.asyncIterator]();
let rejectAbort: ((reason?: unknown) => void) | null = null;
const abortPromise = new Promise<never>((_, reject) => {
rejectAbort = reject;
});
const onAbort = () => rejectAbort?.(abortError());
params.abortSignal?.addEventListener("abort", onAbort, {
once: true,
});
for await (const chunk of stream) {
const parts =
(chunk as { candidates?: { content?: { parts?: GeminiPart[] } }[] })
.candidates?.[0]?.content?.parts ?? [];
try {
while (true) {
throwIfAborted(params.abortSignal);
const { value: chunk, done } = await Promise.race([
iterator.next(),
abortPromise,
]);
if (done) break;
logRawLlmStream({
provider: "gemini",
model,
iteration: iter,
label: "chunk",
payload: chunk,
});
const failureMessage = geminiStreamFailureMessage(chunk);
if (failureMessage) throw new Error(failureMessage);
for (const part of parts) {
if (part.text) {
if (part.thought) {
sawThinking = true;
callbacks.onReasoningDelta?.(part.text);
} else {
textParts.push(part.text);
callbacks.onContentDelta?.(part.text);
const parts =
(chunk as { candidates?: { content?: { parts?: GeminiPart[] } }[] })
.candidates?.[0]?.content?.parts ?? [];
for (const part of parts) {
if (part.text) {
if (part.thought) {
sawThinking = true;
callbacks.onReasoningDelta?.(part.text);
} else {
textParts.push(part.text);
callbacks.onContentDelta?.(part.text);
}
}
if (part.functionCall) {
// Preserve the whole part (including thoughtSignature)
// so it can be echoed verbatim in the replay turn.
callParts.push(part);
const call: NormalizedToolCall = {
id: part.functionCall.id ?? `${part.functionCall.name}-${toolCalls.length}`,
name: part.functionCall.name,
input: part.functionCall.args ?? {},
};
callbacks.onToolCallStart?.(call);
toolCalls.push(call);
}
}
if (part.functionCall) {
// Preserve the whole part (including thoughtSignature)
// so it can be echoed verbatim in the replay turn.
callParts.push(part);
const call: NormalizedToolCall = {
id: part.functionCall.id ?? `${part.functionCall.name}-${toolCalls.length}`,
name: part.functionCall.name,
input: part.functionCall.args ?? {},
};
callbacks.onToolCallStart?.(call);
toolCalls.push(call);
}
}
} catch (error) {
if (params.abortSignal?.aborted) throw abortError();
throw new Error(geminiErrorMessage(error));
} finally {
params.abortSignal?.removeEventListener("abort", onAbort);
if (params.abortSignal?.aborted) {
await iterator.return?.();
}
}
if (sawThinking) callbacks.onReasoningBlockEnd?.();
throwIfAborted(params.abortSignal);
fullText += textParts.join("");
@ -124,6 +274,7 @@ export async function streamGemini(
}
const results = await runTools(toolCalls);
throwIfAborted(params.abortSignal);
// Append the model's turn (text + functionCall parts, in that order)
// and the matching functionResponse turn.
@ -159,12 +310,17 @@ export async function completeGeminiText(params: {
apiKeys?: { gemini?: string | null };
}): Promise<string> {
const ai = client(params.apiKeys?.gemini);
const resp = await ai.models.generateContent({
model: params.model,
contents: [{ role: "user", parts: [{ text: params.user }] }],
config: params.systemPrompt
? { systemInstruction: params.systemPrompt }
: undefined,
});
let resp: Awaited<ReturnType<typeof ai.models.generateContent>>;
try {
resp = await ai.models.generateContent({
model: params.model,
contents: [{ role: "user", parts: [{ text: params.user }] }],
config: params.systemPrompt
? { systemInstruction: params.systemPrompt }
: undefined,
});
} catch (error) {
throw new Error(geminiErrorMessage(error));
}
return resp.text ?? "";
}

View file

@ -9,18 +9,18 @@ export const GEMINI_MAIN_MODELS = [
"gemini-3.1-pro-preview",
"gemini-3-flash-preview",
] as const;
export const OPENAI_MAIN_MODELS = ["gpt-5.5", "gpt-5.4-mini"] as const;
export const OPENAI_MAIN_MODELS = ["gpt-5.5", "gpt-5.4"] as const;
// Mid-tier (used for tabular review) — user picks one in account settings.
export const CLAUDE_MID_MODELS = ["claude-sonnet-4-6"] as const;
export const GEMINI_MID_MODELS = ["gemini-3-flash-preview"] as const;
export const OPENAI_MID_MODELS = ["gpt-5.4-mini"] as const;
export const OPENAI_MID_MODELS = ["gpt-5.4"] as const;
// Low-tier (used for title generation, lightweight extractions) — user picks
// one in account settings.
export const CLAUDE_LOW_MODELS = ["claude-haiku-4-5"] as const;
export const GEMINI_LOW_MODELS = ["gemini-3.1-flash-lite-preview"] as const;
export const OPENAI_LOW_MODELS = ["gpt-5.4-nano"] as const;
export const OPENAI_LOW_MODELS = ["gpt-5.4-lite"] as const;
export const DEFAULT_MAIN_MODEL = "gemini-3-flash-preview";
export const DEFAULT_TITLE_MODEL = "gemini-3.1-flash-lite-preview";

View file

@ -6,6 +6,7 @@ import type {
StreamChatParams,
StreamChatResult,
} from "./types";
import { logRawLlmStream } from "./rawStreamLog";
const OPENAI_RESPONSES_URL = "https://api.openai.com/v1/responses";
const MAX_OUTPUT_TOKENS = 16384;
@ -31,7 +32,13 @@ type ResponseFunctionCallItem = {
type ResponseStreamEvent = {
type?: string;
delta?: string;
response?: { id?: string; output_text?: string };
response?: {
id?: string;
output_text?: string;
status?: string;
error?: { code?: string; message?: string } | null;
};
error?: { code?: string; message?: string } | null;
item?: ResponseFunctionCallItem;
};
@ -104,6 +111,35 @@ function parseFunctionCall(item: ResponseFunctionCallItem): NormalizedToolCall {
};
}
function openAIStreamFailureMessage(event: ResponseStreamEvent): string | null {
const error = event.response?.error ?? event.error ?? null;
const failed =
event.type === "response.failed" ||
event.response?.status === "failed" ||
!!error;
if (!failed) return null;
const message =
typeof error?.message === "string" && error.message.trim()
? error.message.trim()
: "OpenAI response failed.";
const code =
typeof error?.code === "string" && error.code.trim()
? error.code.trim()
: null;
return code ? `OpenAI error (${code}): ${message}` : message;
}
function abortError(): Error {
const err = new Error("Stream aborted.");
err.name = "AbortError";
return err;
}
function throwIfAborted(signal?: AbortSignal) {
if (signal?.aborted) throw abortError();
}
async function createResponse(params: {
model: string;
input: ResponseInputItem[];
@ -114,6 +150,7 @@ async function createResponse(params: {
previousResponseId?: string;
reasoningSummary?: boolean;
apiKey: string;
signal?: AbortSignal;
}): Promise<Response> {
const response = await fetch(OPENAI_RESPONSES_URL, {
method: "POST",
@ -133,6 +170,7 @@ async function createResponse(params: {
? { summary: "auto" }
: undefined,
}),
signal: params.signal,
});
if (!response.ok) {
@ -168,6 +206,7 @@ export async function streamOpenAI(
const hasTools = responseTools.length > 0;
for (let iter = 0; iter < maxIter; iter++) {
throwIfAborted(params.abortSignal);
const response = await createResponse({
model,
instructions: iter === 0 ? systemPrompt : undefined,
@ -177,6 +216,7 @@ export async function streamOpenAI(
previousResponseId,
reasoningSummary: !!enableThinking,
apiKey: key,
signal: params.abortSignal,
});
if (!response.body) throw new Error("OpenAI response had no body");
@ -189,14 +229,36 @@ export async function streamOpenAI(
let sawReasoning = false;
while (true) {
throwIfAborted(params.abortSignal);
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const decoded = decoder.decode(value, { stream: true });
logRawLlmStream({
provider: "openai",
model,
iteration: iter,
label: "sse_chunk",
payload: decoded,
});
buffer += decoded;
const extracted = extractSseJson(buffer);
buffer = extracted.rest;
for (const event of extracted.events as ResponseStreamEvent[]) {
logRawLlmStream({
provider: "openai",
model,
iteration: iter,
label: "sse_event",
payload: event,
});
const failureMessage = openAIStreamFailureMessage(event);
if (failureMessage) {
throw new Error(failureMessage);
}
if (event.response?.id) {
previousResponseId = event.response.id;
}
@ -244,6 +306,7 @@ export async function streamOpenAI(
}
if (sawReasoning) callbacks.onReasoningBlockEnd?.();
throwIfAborted(params.abortSignal);
if (!toolCalls.length || !runTools) {
if (pendingText) {
@ -254,6 +317,7 @@ export async function streamOpenAI(
}
const results = await runTools(toolCalls);
throwIfAborted(params.abortSignal);
input = results.map((result) => ({
type: "function_call_output",
call_id: result.tool_use_id,

View file

@ -0,0 +1,19 @@
export function logRawLlmStream(args: {
provider: string;
model: string;
iteration: number;
label: string;
payload: unknown;
}) {
if (
process.env.NODE_ENV === "production" &&
process.env.LOG_RAW_LLM_STREAM !== "true"
) {
return;
}
console.log(
`[raw-llm-stream:${args.provider}:${args.model}:iter-${args.iteration}] ${args.label}`,
);
console.dir(args.payload, { depth: null, maxArrayLength: null });
}

View file

@ -40,6 +40,8 @@ export type UserApiKeys = {
claude?: string | null;
gemini?: string | null;
openai?: string | null;
openrouter?: string | null;
courtlistener?: string | null;
};
export type StreamChatParams = {
@ -58,6 +60,7 @@ export type StreamChatParams = {
* one-shot completions should leave this off to save tokens and latency.
*/
enableThinking?: boolean;
abortSignal?: AbortSignal;
};
export type StreamChatResult = {

View file

@ -12,11 +12,14 @@
import {
S3Client,
PutObjectCommand,
GetObjectCommand,
DeleteObjectCommand,
ListObjectsV2Command,
} from "@aws-sdk/client-s3";
import * as S3Commands from "@aws-sdk/client-s3";
import { getSignedUrl as awsGetSignedUrl } from "@aws-sdk/s3-request-presigner";
const GetObjectCommand = (S3Commands as any).GetObjectCommand;
let cachedClient: S3Client | undefined;
function getClient(): S3Client {
@ -79,9 +82,9 @@ export async function downloadFile(key: string): Promise<ArrayBuffer | null> {
if (!storageEnabled) return null;
try {
const client = getClient();
const response = await client.send(
const response = (await client.send(
new GetObjectCommand({ Bucket: BUCKET, Key: key }),
);
)) as any;
if (!response.Body) return null;
const bytes = await response.Body.transformToByteArray();
return bytes.buffer as ArrayBuffer;
@ -90,6 +93,27 @@ export async function downloadFile(key: string): Promise<ArrayBuffer | null> {
}
}
export async function listFiles(prefix: string): Promise<string[]> {
if (!storageEnabled) return [];
const client = getClient();
const keys: string[] = [];
let ContinuationToken: string | undefined;
do {
const response = await client.send(
new ListObjectsV2Command({
Bucket: BUCKET,
Prefix: prefix,
ContinuationToken,
}),
);
for (const item of response.Contents ?? []) {
if (item.Key) keys.push(item.Key);
}
ContinuationToken = response.NextContinuationToken;
} while (ContinuationToken);
return keys;
}
// ---------------------------------------------------------------------------
// Delete
// ---------------------------------------------------------------------------
@ -123,7 +147,7 @@ export async function getSignedUrl(
Bucket: BUCKET,
Key: key,
ResponseContentDisposition: responseContentDisposition,
});
}) as any;
return await awsGetSignedUrl(client, command, { expiresIn });
} catch {
return null;

View file

@ -3,7 +3,12 @@ import { createServerSupabase } from "./supabase";
import type { UserApiKeys } from "./llm";
type Db = ReturnType<typeof createServerSupabase>;
export type ApiKeyProvider = "claude" | "gemini" | "openai";
export type ApiKeyProvider =
| "claude"
| "gemini"
| "openai"
| "openrouter"
| "courtlistener";
export type ApiKeySource = "user" | "env" | null;
export type ApiKeyStatus = Record<ApiKeyProvider, boolean> & {
sources: Record<ApiKeyProvider, ApiKeySource>;
@ -16,7 +21,13 @@ type EncryptedKeyRow = {
auth_tag: string;
};
const PROVIDERS: ApiKeyProvider[] = ["claude", "gemini", "openai"];
const PROVIDERS: ApiKeyProvider[] = [
"claude",
"gemini",
"openai",
"openrouter",
"courtlistener",
];
function envApiKey(provider: ApiKeyProvider): string | null {
if (provider === "claude") {
@ -29,6 +40,12 @@ function envApiKey(provider: ApiKeyProvider): string | null {
if (provider === "openai") {
return process.env.OPENAI_API_KEY?.trim() || null;
}
if (provider === "openrouter") {
return process.env.OPENROUTER_API_KEY?.trim() || null;
}
if (provider === "courtlistener") {
return process.env.COURTLISTENER_API_TOKEN?.trim() || null;
}
return process.env.GEMINI_API_KEY?.trim() || null;
}
@ -96,10 +113,14 @@ export async function getUserApiKeyStatus(
claude: false,
gemini: false,
openai: false,
openrouter: false,
courtlistener: false,
sources: {
claude: null,
gemini: null,
openai: null,
openrouter: null,
courtlistener: null,
},
};
@ -135,6 +156,8 @@ export async function getUserApiKeys(
claude: envApiKey("claude"),
gemini: envApiKey("gemini"),
openai: envApiKey("openai"),
openrouter: envApiKey("openrouter"),
courtlistener: envApiKey("courtlistener"),
};
const { data, error } = await db

View file

@ -16,7 +16,7 @@ export type UserModelSettings = {
// Title generation is a lightweight task — always routed to the cheapest model
// of whichever provider the user has keys for: Gemini Flash Lite if Gemini is
// available, otherwise OpenAI nano, otherwise Claude Haiku. With no user keys
// available, otherwise OpenAI lite, otherwise Claude Haiku. With no user keys
// set, defaults to Gemini (the dev-mode env fallback).
function resolveTitleModel(apiKeys: UserApiKeys): string {
if (apiKeys.gemini?.trim()) return DEFAULT_TITLE_MODEL;
@ -32,13 +32,13 @@ export async function getUserModelSettings(
const client = db ?? createServerSupabase();
const { data } = await client
.from("user_profiles")
.select("tabular_model")
.select("title_model, tabular_model")
.eq("user_id", userId)
.single();
const api_keys = await getStoredUserApiKeys(userId, client);
return {
title_model: resolveTitleModel(api_keys),
title_model: resolveModel(data?.title_model, resolveTitleModel(api_keys)),
tabular_model: resolveModel(data?.tabular_model, DEFAULT_TABULAR_MODEL),
api_keys,
};