Merge upstream/dev into feature/multi-agent

2026-05-06 22:32:39 +02:00 · 2026-05-05 01:44:46 +02:00 · 2026-05-05 01:44:46 +02:00 · 5119915f4f
commit 5119915f4f
parent 9e35cdaec7 b2373c1ba3
278 changed files with 34669 additions and 8970 deletions
--- a/surfsense_web/lib/agent-filesystem.ts
+++ b/surfsense_web/lib/agent-filesystem.ts
@ -12,6 +12,10 @@ export interface AgentFilesystemSelection {
 	local_filesystem_mounts?: AgentFilesystemMountSelection[];
 }

+export interface AgentFilesystemSelectionOptions {
+	localFilesystemEnabled: boolean;
+}
+
 const DEFAULT_SELECTION: AgentFilesystemSelection = {
 	filesystem_mode: "cloud",
 	client_platform: "web",
@ -23,10 +27,15 @@ export function getClientPlatform(): ClientPlatform {
 }

 export async function getAgentFilesystemSelection(
-	searchSpaceId?: number | null
+	searchSpaceId?: number | null,
+	options?: AgentFilesystemSelectionOptions
 ): Promise<AgentFilesystemSelection> {
 	const platform = getClientPlatform();
-	if (platform !== "desktop" || !window.electronAPI?.getAgentFilesystemSettings) {
+	if (
+		platform !== "desktop" ||
+		!options?.localFilesystemEnabled ||
+		!window.electronAPI?.getAgentFilesystemSettings
+	) {
 		return { ...DEFAULT_SELECTION, client_platform: platform };
 	}
 	try {
--- a/surfsense_web/lib/apis/agent-flags-api.service.ts
+++ b/surfsense_web/lib/apis/agent-flags-api.service.ts
@ -27,6 +27,8 @@ const AgentFeatureFlagsSchema = z.object({
 	enable_plugin_loader: z.boolean(),

 	enable_otel: z.boolean(),
+
+	enable_desktop_local_filesystem: z.boolean(),
 });

 export type AgentFeatureFlags = z.infer<typeof AgentFeatureFlagsSchema>;
--- a/surfsense_web/lib/apis/documents-api.service.ts
+++ b/surfsense_web/lib/apis/documents-api.service.ts
@ -5,6 +5,7 @@ import {
 	type DeleteDocumentRequest,
 	deleteDocumentRequest,
 	deleteDocumentResponse,
+	documentTitleRead,
 	type GetDocumentByChunkRequest,
 	type GetDocumentChunksRequest,
 	type GetDocumentRequest,
@ -269,6 +270,17 @@ class DocumentsApiService {
 		);
 	};

+	getDocumentByVirtualPath = async (request: { search_space_id: number; virtual_path: string }) => {
+		const params = new URLSearchParams({
+			search_space_id: String(request.search_space_id),
+			virtual_path: request.virtual_path,
+		});
+		return baseApiService.get(
+			`/api/v1/documents/by-virtual-path?${params.toString()}`,
+			documentTitleRead
+		);
+	};
+
 	/**
 	 * Get document type counts
 	 */
--- a/surfsense_web/lib/chat/chat-error-classifier.ts
+++ b/surfsense_web/lib/chat/chat-error-classifier.ts
@ -0,0 +1,305 @@
+export type ChatFlow = "new" | "resume" | "regenerate";
+
+export type ChatErrorKind =
+	| "premium_quota_exhausted"
+	| "thread_busy"
+	| "send_failed_pre_accept"
+	| "auth_expired"
+	| "rate_limited"
+	| "network_offline"
+	| "stream_interrupted"
+	| "stream_parse_error"
+	| "tool_execution_error"
+	| "persist_message_failed"
+	| "server_error"
+	| "unknown";
+
+export type ChatErrorChannel = "pinned_inline" | "toast" | "silent";
+export type ChatTelemetryEvent = "chat_blocked" | "chat_error";
+export type ChatErrorSeverity = "info" | "warn" | "error";
+
+export interface NormalizedChatError {
+	kind: ChatErrorKind;
+	channel: ChatErrorChannel;
+	severity: ChatErrorSeverity;
+	telemetryEvent: ChatTelemetryEvent;
+	isExpected: boolean;
+	userMessage: string;
+	assistantMessage?: string;
+	rawMessage?: string;
+	errorCode?: string;
+	details?: Record<string, unknown>;
+}
+
+export interface RawChatErrorInput {
+	error: unknown;
+	flow: ChatFlow;
+	context?: {
+		searchSpaceId?: number;
+		threadId?: number | null;
+	};
+}
+
+export const PREMIUM_QUOTA_ASSISTANT_MESSAGE =
+	"I can’t continue with the current premium model because your premium credit is exhausted. Switch to a free model or top up your credit to continue.";
+
+function getErrorMessage(error: unknown): string {
+	if (error instanceof Error) return error.message;
+	if (typeof error === "string") return error;
+	try {
+		return JSON.stringify(error);
+	} catch {
+		return "Unknown error";
+	}
+}
+
+function getErrorCode(
+	error: unknown,
+	parsedJson: Record<string, unknown> | null
+): string | undefined {
+	if (error instanceof Error) {
+		const withCode = error as Error & { errorCode?: string; code?: string };
+		if (withCode.errorCode) return withCode.errorCode;
+		if (withCode.code) return withCode.code;
+	}
+
+	if (typeof error === "object" && error !== null) {
+		const withCode = error as { errorCode?: unknown };
+		if (typeof withCode.errorCode === "string" && withCode.errorCode) {
+			return withCode.errorCode;
+		}
+	}
+
+	if (parsedJson) {
+		const topLevelCode = parsedJson.errorCode;
+		if (typeof topLevelCode === "string" && topLevelCode) {
+			return topLevelCode;
+		}
+	}
+
+	return undefined;
+}
+
+function parseEmbeddedJson(text: string): Record<string, unknown> | null {
+	const candidates = [text];
+	const firstBraceIdx = text.indexOf("{");
+	if (firstBraceIdx >= 0) {
+		candidates.push(text.slice(firstBraceIdx));
+	}
+	for (const candidate of candidates) {
+		try {
+			const parsed = JSON.parse(candidate);
+			if (typeof parsed === "object" && parsed !== null) {
+				return parsed as Record<string, unknown>;
+			}
+		} catch {
+			// noop
+		}
+	}
+	return null;
+}
+
+function inferProviderErrorType(parsedJson: Record<string, unknown> | null): string | undefined {
+	if (!parsedJson) return undefined;
+	const topLevelType = parsedJson.type;
+	if (typeof topLevelType === "string" && topLevelType) return topLevelType;
+	const nestedError = parsedJson.error;
+	if (typeof nestedError === "object" && nestedError !== null) {
+		const nestedType = (nestedError as Record<string, unknown>).type;
+		if (typeof nestedType === "string" && nestedType) return nestedType;
+	}
+	return undefined;
+}
+
+export function classifyChatError(input: RawChatErrorInput): NormalizedChatError {
+	const { error } = input;
+	const rawMessage = getErrorMessage(error);
+	const parsedJson = parseEmbeddedJson(rawMessage);
+	const errorCode = getErrorCode(error, parsedJson);
+	const providerErrorType = inferProviderErrorType(parsedJson);
+	const providerTypeNormalized = providerErrorType?.toLowerCase() ?? "";
+	const errorName = error instanceof Error ? error.name : undefined;
+
+	if (errorName === "AbortError") {
+		return {
+			kind: "stream_interrupted",
+			channel: "silent",
+			severity: "info",
+			telemetryEvent: "chat_error",
+			isExpected: true,
+			userMessage: "Request canceled.",
+			rawMessage,
+			errorCode,
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "PREMIUM_QUOTA_EXHAUSTED") {
+		return {
+			kind: "premium_quota_exhausted",
+			channel: "pinned_inline",
+			severity: "info",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage: "Buy more tokens to continue with this model, or switch to a free model.",
+			assistantMessage: PREMIUM_QUOTA_ASSISTANT_MESSAGE,
+			rawMessage,
+			errorCode: errorCode ?? "PREMIUM_QUOTA_EXHAUSTED",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "TURN_CANCELLING") {
+		return {
+			kind: "thread_busy",
+			channel: "toast",
+			severity: "info",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage: "A previous response is still stopping. Please try again in a moment.",
+			rawMessage,
+			errorCode: errorCode ?? "TURN_CANCELLING",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "THREAD_BUSY") {
+		return {
+			kind: "thread_busy",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage:
+				"Another response is still finishing for this thread. Please try again in a moment.",
+			rawMessage,
+			errorCode: errorCode ?? "THREAD_BUSY",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "SEND_FAILED_PRE_ACCEPT") {
+		return {
+			kind: "send_failed_pre_accept",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage: "Message not sent. Please retry.",
+			rawMessage,
+			errorCode: errorCode ?? "SEND_FAILED_PRE_ACCEPT",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "AUTH_EXPIRED" || errorCode === "UNAUTHORIZED") {
+		return {
+			kind: "auth_expired",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_error",
+			isExpected: true,
+			userMessage: "Your session expired. Please sign in again.",
+			rawMessage,
+			errorCode: errorCode ?? "AUTH_EXPIRED",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "RATE_LIMITED" || providerTypeNormalized === "rate_limit_error") {
+		return {
+			kind: "rate_limited",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage:
+				"This model is temporarily rate-limited. Please try again in a few seconds or switch models.",
+			rawMessage,
+			errorCode: errorCode ?? "RATE_LIMITED",
+			details: { flow: input.flow, providerErrorType },
+		};
+	}
+
+	if (errorCode === "NETWORK_ERROR") {
+		return {
+			kind: "network_offline",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_error",
+			isExpected: true,
+			userMessage: "Connection issue. Please try again.",
+			rawMessage,
+			errorCode: errorCode ?? "NETWORK_ERROR",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "STREAM_PARSE_ERROR") {
+		return {
+			kind: "stream_parse_error",
+			channel: "toast",
+			severity: "error",
+			telemetryEvent: "chat_error",
+			isExpected: false,
+			userMessage: "We hit a response formatting issue. Please try again.",
+			rawMessage,
+			errorCode: errorCode ?? "STREAM_PARSE_ERROR",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "TOOL_EXECUTION_ERROR") {
+		return {
+			kind: "tool_execution_error",
+			channel: "toast",
+			severity: "error",
+			telemetryEvent: "chat_error",
+			isExpected: false,
+			userMessage: "A tool failed while processing your request. Please try again.",
+			rawMessage,
+			errorCode: errorCode ?? "TOOL_EXECUTION_ERROR",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "PERSIST_MESSAGE_FAILED") {
+		return {
+			kind: "persist_message_failed",
+			channel: "toast",
+			severity: "error",
+			telemetryEvent: "chat_error",
+			isExpected: false,
+			userMessage: "Response generated, but saving failed. Please retry once.",
+			rawMessage,
+			errorCode: errorCode ?? "PERSIST_MESSAGE_FAILED",
+			details: { flow: input.flow },
+		};
+	}
+
+	if (errorCode === "SERVER_ERROR") {
+		return {
+			kind: "server_error",
+			channel: "toast",
+			severity: "error",
+			telemetryEvent: "chat_error",
+			isExpected: false,
+			userMessage: "We couldn’t complete this response right now. Please try again.",
+			rawMessage,
+			errorCode: errorCode ?? "SERVER_ERROR",
+			details: { flow: input.flow, providerErrorType },
+		};
+	}
+
+	return {
+		kind: "unknown",
+		channel: "toast",
+		severity: "error",
+		telemetryEvent: "chat_error",
+		isExpected: false,
+		userMessage: "We couldn’t complete this response right now. Please try again.",
+		rawMessage,
+		errorCode,
+		details: { flow: input.flow, providerErrorType },
+	};
+}
--- a/surfsense_web/lib/chat/chat-request-errors.ts
+++ b/surfsense_web/lib/chat/chat-request-errors.ts
@ -0,0 +1,110 @@
+export async function toHttpResponseError(
+	response: Response
+): Promise<Error & { errorCode?: string; retryAfterMs?: number }> {
+	const statusDefaultCode =
+		response.status === 409
+			? "THREAD_BUSY"
+			: response.status === 429
+				? "RATE_LIMITED"
+				: response.status === 401 || response.status === 403
+					? "AUTH_EXPIRED"
+					: "SERVER_ERROR";
+
+	let rawBody = "";
+	try {
+		rawBody = await response.text();
+	} catch {
+		// noop
+	}
+
+	let parsedBody: Record<string, unknown> | null = null;
+	if (rawBody) {
+		try {
+			const parsed = JSON.parse(rawBody);
+			if (typeof parsed === "object" && parsed !== null) {
+				parsedBody = parsed as Record<string, unknown>;
+			}
+		} catch {
+			// noop
+		}
+	}
+
+	const detail = parsedBody?.detail;
+	const detailObject =
+		typeof detail === "object" && detail !== null ? (detail as Record<string, unknown>) : null;
+	const detailMessage = typeof detail === "string" ? detail : undefined;
+	const topLevelMessage =
+		typeof parsedBody?.message === "string" ? (parsedBody.message as string) : undefined;
+	const detailNestedMessage =
+		typeof detailObject?.message === "string" ? (detailObject.message as string) : undefined;
+
+	const topLevelCode =
+		typeof parsedBody?.errorCode === "string"
+			? parsedBody.errorCode
+			: typeof parsedBody?.error_code === "string"
+				? parsedBody.error_code
+				: undefined;
+	const detailCode =
+		typeof detailObject?.errorCode === "string"
+			? detailObject.errorCode
+			: typeof detailObject?.error_code === "string"
+				? detailObject.error_code
+				: undefined;
+
+	const errorCode = detailCode ?? topLevelCode ?? statusDefaultCode;
+
+	const detailRetryAfterMs =
+		typeof detailObject?.retry_after_ms === "number"
+			? detailObject.retry_after_ms
+			: typeof detailObject?.retryAfterMs === "number"
+				? detailObject.retryAfterMs
+				: undefined;
+	const topRetryAfterMs =
+		typeof parsedBody?.retry_after_ms === "number"
+			? parsedBody.retry_after_ms
+			: typeof parsedBody?.retryAfterMs === "number"
+				? parsedBody.retryAfterMs
+				: undefined;
+	const headerRetryAfterMsRaw = response.headers.get("retry-after-ms");
+	const headerRetryAfterMs = headerRetryAfterMsRaw ? Number.parseFloat(headerRetryAfterMsRaw) : NaN;
+	const retryAfterHeader = response.headers.get("retry-after");
+	const retryAfterSeconds = retryAfterHeader ? Number.parseFloat(retryAfterHeader) : NaN;
+	const retryAfterMsFromHeader = Number.isFinite(headerRetryAfterMs)
+		? Math.max(0, Math.round(headerRetryAfterMs))
+		: Number.isFinite(retryAfterSeconds)
+			? Math.max(0, Math.round(retryAfterSeconds * 1000))
+			: undefined;
+	const retryAfterMs = detailRetryAfterMs ?? topRetryAfterMs ?? retryAfterMsFromHeader ?? undefined;
+	const message =
+		detailNestedMessage ?? detailMessage ?? topLevelMessage ?? `Backend error: ${response.status}`;
+
+	return Object.assign(new Error(message), { errorCode, retryAfterMs });
+}
+
+export function tagPreAcceptSendFailure(error: unknown): unknown {
+	if (error instanceof Error) {
+		const withCode = error as Error & { errorCode?: string; code?: string };
+		const existingCode = withCode.errorCode ?? withCode.code;
+		const passthroughCodes = new Set([
+			"PREMIUM_QUOTA_EXHAUSTED",
+			"THREAD_BUSY",
+			"TURN_CANCELLING",
+			"AUTH_EXPIRED",
+			"UNAUTHORIZED",
+			"RATE_LIMITED",
+			"NETWORK_ERROR",
+			"STREAM_PARSE_ERROR",
+			"TOOL_EXECUTION_ERROR",
+			"PERSIST_MESSAGE_FAILED",
+			"SERVER_ERROR",
+		]);
+		if (existingCode && passthroughCodes.has(existingCode)) {
+			return Object.assign(error, { errorCode: existingCode });
+		}
+		return Object.assign(error, { errorCode: "SEND_FAILED_PRE_ACCEPT" });
+	}
+
+	return Object.assign(new Error("Failed to send message before stream acceptance"), {
+		errorCode: "SEND_FAILED_PRE_ACCEPT",
+	});
+}
--- a/surfsense_web/lib/chat/parse-mention-segments.ts
+++ b/surfsense_web/lib/chat/parse-mention-segments.ts
@ -0,0 +1,54 @@
+import type { MentionedDocumentInfo } from "@/atoms/chat/mentioned-documents.atom";
+
+export type MentionSegment =
+	| { type: "text"; value: string; start: number }
+	| { type: "mention"; doc: MentionedDocumentInfo; start: number };
+
+/**
+ * Tokenizes a user message into text and `@mention` segments.
+ *
+ * Pure: no React, no DOM, no side effects. Safe to unit-test and reuse.
+ *
+ * Mentions are matched greedily by longest title first so that a longer title
+ * (e.g. `@Project Roadmap`) is never shadowed by a shorter prefix
+ * (e.g. `@Project`).
+ */
+export function parseMentionSegments(
+	text: string,
+	docs: ReadonlyArray<MentionedDocumentInfo>
+): MentionSegment[] {
+	if (text.length === 0) return [];
+	if (docs.length === 0) return [{ type: "text", value: text, start: 0 }];
+
+	const tokens = docs
+		.map((doc) => ({ doc, token: `@${doc.title}` }))
+		.sort((a, b) => b.token.length - a.token.length);
+
+	const segments: MentionSegment[] = [];
+	let i = 0;
+	let buffer = "";
+	let bufferStart = 0;
+
+	while (i < text.length) {
+		const tokenMatch = tokens.find(({ token }) => text.startsWith(token, i));
+		if (tokenMatch) {
+			if (buffer) {
+				segments.push({ type: "text", value: buffer, start: bufferStart });
+				buffer = "";
+			}
+			segments.push({ type: "mention", doc: tokenMatch.doc, start: i });
+			i += tokenMatch.token.length;
+			bufferStart = i;
+			continue;
+		}
+		if (!buffer) bufferStart = i;
+		buffer += text[i];
+		i += 1;
+	}
+
+	if (buffer) {
+		segments.push({ type: "text", value: buffer, start: bufferStart });
+	}
+
+	return segments;
+}
--- a/surfsense_web/lib/chat/stream-flush.ts
+++ b/surfsense_web/lib/chat/stream-flush.ts
@ -0,0 +1,19 @@
+import { FrameBatchedUpdater } from "@/lib/chat/streaming-state";
+
+export function createStreamFlushHelpers(flushMessages: () => void): {
+	batcher: FrameBatchedUpdater;
+	scheduleFlush: () => void;
+	forceFlush: () => void;
+} {
+	const batcher = new FrameBatchedUpdater();
+	const scheduleFlush = () => batcher.schedule(flushMessages);
+	// Force-flush helper: ``batcher.flush()`` is a no-op when
+	// ``dirty=false`` (e.g. a tool starts before any text streamed).
+	// ``scheduleFlush(); batcher.flush()`` sets the dirty bit first so
+	// terminal events render promptly without the throttle delay.
+	const forceFlush = () => {
+		scheduleFlush();
+		batcher.flush();
+	};
+	return { batcher, scheduleFlush, forceFlush };
+}
--- a/surfsense_web/lib/chat/stream-pipeline.ts
+++ b/surfsense_web/lib/chat/stream-pipeline.ts
@ -0,0 +1,200 @@
+import {
+	addStepSeparator,
+	addToolCall,
+	appendReasoning,
+	appendText,
+	appendToolInputDelta,
+	type ContentPartsState,
+	endReasoning,
+	readSSEStream,
+	type SSEEvent,
+	type ThinkingStepData,
+	type ToolUIGate,
+	updateThinkingSteps,
+	updateToolCall,
+} from "@/lib/chat/streaming-state";
+
+export type SharedStreamEventContext = {
+	contentPartsState: ContentPartsState;
+	toolsWithUI: ToolUIGate;
+	currentThinkingSteps: Map<string, ThinkingStepData>;
+	scheduleFlush: () => void;
+	forceFlush: () => void;
+	onTokenUsage?: (data: Extract<SSEEvent, { type: "data-token-usage" }>["data"]) => void;
+	onTurnStatus?: (data: Extract<SSEEvent, { type: "data-turn-status" }>["data"]) => void;
+	onToolOutputAvailable?: (
+		event: Extract<SSEEvent, { type: "tool-output-available" }>,
+		context: {
+			contentPartsState: ContentPartsState;
+			toolCallIndices: Map<string, number>;
+		}
+	) => void;
+};
+
+/**
+ * After a tool produces output, mark any previously-decided interrupt tool
+ * calls as completed so the ApprovalCard can transition from shimmer to done.
+ */
+export function markInterruptsCompleted(
+	contentParts: Array<{ type: string; result?: unknown }>
+): void {
+	for (const part of contentParts) {
+		if (
+			part.type === "tool-call" &&
+			typeof part.result === "object" &&
+			part.result !== null &&
+			(part.result as Record<string, unknown>).__interrupt__ === true &&
+			(part.result as Record<string, unknown>).__decided__ &&
+			!(part.result as Record<string, unknown>).__completed__
+		) {
+			part.result = { ...(part.result as Record<string, unknown>), __completed__: true };
+		}
+	}
+}
+
+export function hasPersistableContent(
+	contentParts: ContentPartsState["contentParts"],
+	toolsWithUI: ToolUIGate
+) {
+	return contentParts.some(
+		(part) =>
+			(part.type === "text" && part.text.length > 0) ||
+			(part.type === "reasoning" && part.text.length > 0) ||
+			(part.type === "tool-call" && (toolsWithUI === "all" || toolsWithUI.has(part.toolName)))
+	);
+}
+
+function toStreamTerminalError(
+	event: Extract<SSEEvent, { type: "error" }>
+): Error & { errorCode?: string } {
+	return Object.assign(new Error(event.errorText || "Server error"), {
+		errorCode: event.errorCode,
+	});
+}
+
+export function processSharedStreamEvent(
+	parsed: SSEEvent,
+	context: SharedStreamEventContext
+): boolean {
+	const { contentPartsState, toolsWithUI, currentThinkingSteps, scheduleFlush, forceFlush } =
+		context;
+	const { contentParts, toolCallIndices } = contentPartsState;
+
+	switch (parsed.type) {
+		case "text-delta":
+			appendText(contentPartsState, parsed.delta);
+			scheduleFlush();
+			return true;
+
+		case "reasoning-delta":
+			appendReasoning(contentPartsState, parsed.delta);
+			scheduleFlush();
+			return true;
+
+		case "reasoning-end":
+			endReasoning(contentPartsState);
+			scheduleFlush();
+			return true;
+
+		case "start-step":
+			addStepSeparator(contentPartsState);
+			scheduleFlush();
+			return true;
+
+		case "finish-step":
+			return true;
+
+		case "tool-input-start":
+			addToolCall(
+				contentPartsState,
+				toolsWithUI,
+				parsed.toolCallId,
+				parsed.toolName,
+				{},
+				false,
+				parsed.langchainToolCallId
+			);
+			forceFlush();
+			return true;
+
+		case "tool-input-delta":
+			// High-frequency event: deltas can fire dozens of times per call,
+			// so use throttled scheduleFlush (NOT forceFlush) to coalesce.
+			appendToolInputDelta(contentPartsState, parsed.toolCallId, parsed.inputTextDelta);
+			scheduleFlush();
+			return true;
+
+		case "tool-input-available": {
+			const finalArgsText = JSON.stringify(parsed.input ?? {}, null, 2);
+			if (toolCallIndices.has(parsed.toolCallId)) {
+				updateToolCall(contentPartsState, parsed.toolCallId, {
+					args: parsed.input || {},
+					argsText: finalArgsText,
+					langchainToolCallId: parsed.langchainToolCallId,
+				});
+			} else {
+				addToolCall(
+					contentPartsState,
+					toolsWithUI,
+					parsed.toolCallId,
+					parsed.toolName,
+					parsed.input || {},
+					false,
+					parsed.langchainToolCallId
+				);
+				// addToolCall doesn't accept argsText today; backfill via
+				// updateToolCall so the new card renders pretty-printed JSON.
+				updateToolCall(contentPartsState, parsed.toolCallId, {
+					argsText: finalArgsText,
+				});
+			}
+			forceFlush();
+			return true;
+		}
+
+		case "tool-output-available":
+			updateToolCall(contentPartsState, parsed.toolCallId, {
+				result: parsed.output,
+				langchainToolCallId: parsed.langchainToolCallId,
+			});
+			markInterruptsCompleted(contentParts);
+			context.onToolOutputAvailable?.(parsed, { contentPartsState, toolCallIndices });
+			forceFlush();
+			return true;
+
+		case "data-thinking-step": {
+			const stepData = parsed.data as ThinkingStepData;
+			if (stepData?.id) {
+				currentThinkingSteps.set(stepData.id, stepData);
+				const didUpdate = updateThinkingSteps(contentPartsState, currentThinkingSteps);
+				if (didUpdate) {
+					scheduleFlush();
+				}
+			}
+			return true;
+		}
+
+		case "data-token-usage":
+			context.onTokenUsage?.(parsed.data);
+			return true;
+
+		case "data-turn-status":
+			context.onTurnStatus?.(parsed.data);
+			return true;
+
+		case "error":
+			throw toStreamTerminalError(parsed);
+
+		default:
+			return false;
+	}
+}
+
+export async function consumeSseEvents(
+	response: Response,
+	onEvent: (event: SSEEvent) => void | Promise<void>
+): Promise<void> {
+	for await (const parsed of readSSEStream(response)) {
+		await onEvent(parsed);
+	}
+}
--- a/surfsense_web/lib/chat/stream-side-effects.ts
+++ b/surfsense_web/lib/chat/stream-side-effects.ts
@ -0,0 +1,59 @@
+import type { ThreadMessageLike } from "@assistant-ui/react";
+
+/**
+ * When a streamed message is persisted, the backend returns the durable
+ * turn_id; merge it into assistant-ui metadata for turn-scoped actions.
+ */
+export function mergeChatTurnIdIntoMessage(
+	msg: ThreadMessageLike,
+	turnId: string | null | undefined
+): ThreadMessageLike {
+	if (!turnId) return msg;
+	const existingMeta = (msg.metadata ?? {}) as { custom?: Record<string, unknown> };
+	const existingCustom = existingMeta.custom ?? {};
+	if ((existingCustom as { chatTurnId?: string }).chatTurnId === turnId) return msg;
+	return {
+		...msg,
+		metadata: {
+			...existingMeta,
+			custom: { ...existingCustom, chatTurnId: turnId },
+		},
+	};
+}
+
+export function readStreamedChatTurnId(data: unknown): string | null {
+	if (typeof data !== "object" || data === null) return null;
+	const value = (data as { chat_turn_id?: unknown }).chat_turn_id;
+	return typeof value === "string" && value.length > 0 ? value : null;
+}
+
+/**
+ * Parse the payload of `data-user-message-id` / `data-assistant-message-id`
+ * SSE events emitted by `stream_new_chat` and `stream_resume_chat` after
+ * `persist_user_turn` / `persist_assistant_shell` resolve a canonical
+ * `new_chat_messages.id`. Mirrors {@link readStreamedChatTurnId}.
+ *
+ * Returns `null` when the payload is malformed (missing or non-numeric
+ * `message_id`); callers should treat this as "ignore the event" so a
+ * malformed BE payload never overwrites the optimistic id with a bogus
+ * value.
+ */
+export function readStreamedMessageId(
+	data: unknown
+): { messageId: number; turnId: string | null } | null {
+	if (typeof data !== "object" || data === null) return null;
+	const obj = data as { message_id?: unknown; turn_id?: unknown };
+	if (typeof obj.message_id !== "number" || !Number.isFinite(obj.message_id)) {
+		return null;
+	}
+	const turnId = typeof obj.turn_id === "string" && obj.turn_id.length > 0 ? obj.turn_id : null;
+	return { messageId: obj.message_id, turnId };
+}
+
+export function applyTurnIdToAssistantMessageList(
+	messages: ThreadMessageLike[],
+	assistantMsgId: string,
+	turnId: string
+): ThreadMessageLike[] {
+	return messages.map((m) => (m.id === assistantMsgId ? mergeChatTurnIdIntoMessage(m, turnId) : m));
+}
--- a/surfsense_web/lib/chat/streaming-state.ts
+++ b/surfsense_web/lib/chat/streaming-state.ts
@ -487,6 +487,37 @@ export type SSEEvent =
 			type: "data-turn-info";
 			data: { chat_turn_id: string };
 	  }
+	| {
+			/**
+			 * Emitted by ``stream_new_chat`` AFTER ``data-turn-info`` /
+			 * ``data-turn-status`` and BEFORE any LLM streaming events,
+			 * once ``persist_user_turn`` has resolved the canonical
+			 * ``new_chat_messages.id`` for the user-side row of the
+			 * current turn. The frontend renames its optimistic
+			 * ``msg-user-XXX`` placeholder id to ``msg-{message_id}``
+			 * so DB-id-gated UI (comments, edit-from-this-message)
+			 * unlocks immediately. Not emitted by ``stream_resume_chat``
+			 * (resume reuses the original turn's user message).
+			 */
+			type: "data-user-message-id";
+			data: { message_id: number; turn_id: string };
+	  }
+	| {
+			/**
+			 * Emitted by ``stream_new_chat`` AND ``stream_resume_chat``
+			 * AFTER ``data-turn-info`` / ``data-turn-status`` and BEFORE
+			 * any LLM streaming events, once ``persist_assistant_shell``
+			 * has resolved the canonical ``new_chat_messages.id`` for
+			 * the assistant-side row of the current turn. The frontend
+			 * renames its optimistic ``msg-assistant-XXX`` placeholder
+			 * id, migrates the local ``tokenUsageStore`` and
+			 * ``pendingInterrupt`` references, and binds the running
+			 * mutable ``assistantMsgId`` closure variable to the
+			 * canonical id for the rest of the stream.
+			 */
+			type: "data-assistant-message-id";
+			data: { message_id: number; turn_id: string };
+	  }
 	| {
 			/**
 			 * Best-effort revert pass that ran BEFORE this regeneration.
@ -528,25 +559,40 @@ export type SSEEvent =
 				}>;
 			};
 	  }
+	| {
+			type: "data-turn-status";
+			data: {
+				status: "idle" | "busy" | "cancelling";
+				retry_after_ms?: number;
+				retry_after_at?: number;
+			};
+	  }
 	| {
 			type: "data-token-usage";
 			data: {
 				usage: Record<
 					string,
-					{ prompt_tokens: number; completion_tokens: number; total_tokens: number }
+					{
+						prompt_tokens: number;
+						completion_tokens: number;
+						total_tokens: number;
+						cost_micros?: number;
+					}
 				>;
 				prompt_tokens: number;
 				completion_tokens: number;
 				total_tokens: number;
+				cost_micros?: number;
 				call_details: Array<{
 					model: string;
 					prompt_tokens: number;
 					completion_tokens: number;
 					total_tokens: number;
+					cost_micros?: number;
 				}>;
 			};
 	  }
-	| { type: "error"; errorText: string };
+	| { type: "error"; errorText: string; errorCode?: string };

 /**
 * Async generator that reads an SSE stream and yields parsed JSON objects.
--- a/surfsense_web/lib/chat/thread-persistence.ts
+++ b/surfsense_web/lib/chat/thread-persistence.ts
@ -30,9 +30,20 @@ export interface TokenUsageSummary {
 	prompt_tokens: number;
 	completion_tokens: number;
 	total_tokens: number;
+	/**
+	 * Total provider USD cost for this assistant turn, in micro-USD
+	 * (1_000_000 = $1.00). Optional because rows persisted before the
+	 * cost-credits migration won't have it.
+	 */
+	cost_micros?: number;
 	model_breakdown?: Record<
 		string,
-		{ prompt_tokens: number; completion_tokens: number; total_tokens: number }
+		{
+			prompt_tokens: number;
+			completion_tokens: number;
+			total_tokens: number;
+			cost_micros?: number;
+		}
 	> | null;
 }

@ -133,6 +144,17 @@ export async function getThreadMessages(threadId: number): Promise<ThreadHistory
 * via ``data-turn-info``. Persisting it lets later edits locate the
 * matching LangGraph checkpoint without HumanMessage scanning. Older
 * callers can still omit it for back-compat.
+ *
+ * @deprecated Replaced by the SSE-based message ID handshake. The
+ * streaming generator (`stream_new_chat` / `stream_resume_chat`) now
+ * persists both the user and assistant rows server-side via
+ * `persist_user_turn` / `persist_assistant_shell` and emits
+ * `data-user-message-id` / `data-assistant-message-id` SSE events so
+ * the UI renames its optimistic IDs in real time. The only remaining
+ * caller is `persistAssistantErrorMessage` (pre-stream error fallback
+ * for requests the server never accepted — the server has nothing to
+ * persist in that case). After the legacy route is removed in a
+ * follow-up PR this function will be deleted entirely.
 */
 export async function appendMessage(
 	threadId: number,
--- a/surfsense_web/lib/citations/citation-parser.ts
+++ b/surfsense_web/lib/citations/citation-parser.ts
@ -0,0 +1,130 @@
+// Pure citation parsing for `[citation:...]` tokens emitted by SurfSense
+// agents. No React imports — consumed by both the React renderer
+// (markdown surfaces) and the Plate value transform (document viewer).
+//
+// The same logic previously lived inline in
+// `components/assistant-ui/markdown-text.tsx` with module-level mutable
+// state. This module exposes a per-call URL map so multiple concurrent
+// renderers / SSR contexts can't race each other.
+
+import { FENCED_OR_INLINE_CODE } from "@/lib/markdown/code-regions";
+
+/**
+ * Matches `[citation:...]` with numeric IDs (incl. negative, doc- prefix,
+ * comma-separated), URL-based IDs from live web search, or `urlciteN`
+ * placeholders produced by `preprocessCitationMarkdown`.
+ *
+ * Also matches Chinese brackets 【】 and zero-width spaces that LLMs
+ * sometimes emit.
+ */
+export const CITATION_REGEX =
+	/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g;
+
+/** A single parsed citation reference. */
+export type CitationToken =
+	| { kind: "url"; url: string }
+	| { kind: "chunk"; chunkId: number; isDocsChunk: boolean };
+
+/** Output of `parseTextWithCitations` — interleaved text + citation tokens. */
+export type ParsedSegment = string | CitationToken;
+
+/** Per-call URL placeholder map; key is `urlciteN`, value is the original URL. */
+export type CitationUrlMap = Map<string, string>;
+
+/** Result of preprocessing raw markdown for downstream parsing. */
+export interface PreprocessedCitations {
+	/** Markdown with `[citation:URL]` tokens rewritten to `[citation:urlciteN]`. */
+	content: string;
+	/** Lookup table to recover the original URL from each placeholder. */
+	urlMap: CitationUrlMap;
+}
+
+/** Pattern matching only URL-form citations (used during preprocessing). */
+const URL_CITATION_REGEX = /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+)\s*\u200B?[\]】]/g;
+
+/**
+ * Replace `[citation:URL]` tokens with `[citation:urlciteN]` placeholders so
+ * GFM autolinks don't split the URL out of the brackets during markdown
+ * parsing. Returns both the rewritten content and a map for later lookup.
+ *
+ * Code-fence aware: skips fenced (``` ``` ```) and inline (`` ` ``) code
+ * regions so citation-shaped strings inside example code remain literal.
+ *
+ * Known limitations: `~~~` fences, 4-space indented code, and LaTeX math
+ * blocks are not skipped. Citation tokens inside those regions are rare in
+ * practice; documented in the plan.
+ */
+export function preprocessCitationMarkdown(content: string): PreprocessedCitations {
+	const urlMap: CitationUrlMap = new Map();
+	let counter = 0;
+
+	// Splitting on a regex with one capture group puts code regions at odd
+	// indexes (matched delimiters) and the surrounding text at even indexes.
+	// Only transform the even-indexed parts.
+	const parts = content.split(FENCED_OR_INLINE_CODE);
+	const transformed = parts.map((part, index) => {
+		if (index % 2 === 1) return part;
+		return part.replace(URL_CITATION_REGEX, (_match, url: string) => {
+			const key = `urlcite${counter++}`;
+			urlMap.set(key, url.trim());
+			return `[citation:${key}]`;
+		});
+	});
+
+	return { content: transformed.join(""), urlMap };
+}
+
+/**
+ * Parse a string into an array of plain text segments and citation tokens.
+ *
+ * Pure data — no React. The renderer module is responsible for mapping
+ * tokens to JSX. Negative chunk IDs are forwarded as-is so the consumer
+ * can decide how to render anonymous documents.
+ */
+export function parseTextWithCitations(text: string, urlMap: CitationUrlMap): ParsedSegment[] {
+	const segments: ParsedSegment[] = [];
+	let lastIndex = 0;
+	let match: RegExpExecArray | null;
+
+	CITATION_REGEX.lastIndex = 0;
+	match = CITATION_REGEX.exec(text);
+	while (match !== null) {
+		if (match.index > lastIndex) {
+			segments.push(text.substring(lastIndex, match.index));
+		}
+
+		const captured = match[1];
+
+		if (captured.startsWith("http://") || captured.startsWith("https://")) {
+			segments.push({ kind: "url", url: captured.trim() });
+		} else if (captured.startsWith("urlcite")) {
+			const url = urlMap.get(captured);
+			if (url) {
+				segments.push({ kind: "url", url });
+			}
+		} else {
+			const rawIds = captured.split(",").map((s) => s.trim());
+			for (const rawId of rawIds) {
+				const isDocsChunk = rawId.startsWith("doc-");
+				const chunkId = Number.parseInt(isDocsChunk ? rawId.slice(4) : rawId, 10);
+				if (!Number.isNaN(chunkId)) {
+					segments.push({ kind: "chunk", chunkId, isDocsChunk });
+				}
+			}
+		}
+
+		lastIndex = match.index + match[0].length;
+		match = CITATION_REGEX.exec(text);
+	}
+
+	if (lastIndex < text.length) {
+		segments.push(text.substring(lastIndex));
+	}
+
+	return segments.length > 0 ? segments : [text];
+}
+
+/** Type guard for the citation branch of `ParsedSegment`. */
+export function isCitationToken(segment: ParsedSegment): segment is CitationToken {
+	return typeof segment !== "string";
+}
--- a/surfsense_web/lib/markdown/code-regions.ts
+++ b/surfsense_web/lib/markdown/code-regions.ts
@ -0,0 +1,8 @@
+// Matches fenced (```...```) and inline (`...`) code regions. Used by MDX
+// escaping and citation preprocessing — single source of truth so future
+// edits stay in sync.
+//
+// String.split() with this capturing pattern places non-code parts at even
+// indexes and matched code regions at odd indexes — preserve odd-indexed
+// segments verbatim when transforming markdown.
+export const FENCED_OR_INLINE_CODE = /(```[\s\S]*?```|`[^`\n]+`)/g;
--- a/surfsense_web/lib/posthog/events.ts
+++ b/surfsense_web/lib/posthog/events.ts
@ -1,5 +1,6 @@
 import posthog from "posthog-js";
 import { getConnectorTelemetryMeta } from "@/components/assistant-ui/connector-popup/constants/connector-constants";
+import type { ChatErrorKind, ChatErrorSeverity, ChatFlow } from "@/lib/chat/chat-error-classifier";

 /**
 * PostHog Analytics Event Definitions
@ -139,6 +140,55 @@ export function trackChatError(searchSpaceId: number, chatId: number, error?: st
 	});
 }

+export interface ChatFailureTelemetry {
+	flow: ChatFlow;
+	kind: ChatErrorKind;
+	error_code?: string;
+	severity: ChatErrorSeverity;
+	is_expected: boolean;
+	message?: string;
+}
+
+export function trackChatBlocked(
+	searchSpaceId: number,
+	chatId: number | null,
+	payload: ChatFailureTelemetry
+) {
+	safeCapture(
+		"chat_blocked",
+		compact({
+			search_space_id: searchSpaceId,
+			chat_id: chatId ?? undefined,
+			flow: payload.flow,
+			kind: payload.kind,
+			error_code: payload.error_code,
+			severity: payload.severity,
+			is_expected: payload.is_expected,
+			message: payload.message,
+		})
+	);
+}
+
+export function trackChatErrorDetailed(
+	searchSpaceId: number,
+	chatId: number | null,
+	payload: ChatFailureTelemetry
+) {
+	safeCapture(
+		"chat_error",
+		compact({
+			search_space_id: searchSpaceId,
+			chat_id: chatId ?? undefined,
+			flow: payload.flow,
+			kind: payload.kind,
+			error_code: payload.error_code,
+			severity: payload.severity,
+			is_expected: payload.is_expected,
+			message: payload.message,
+		})
+	);
+}
+
 /**
 * Track a message sent from the unauthenticated "free" / anonymous chat
 * flow. This is intentionally a separate event from `chat_message_sent`