SurfSense/surfsense_web/lib/chat/streaming-state.ts

import type { ThreadMessageLike } from "@assistant-ui/react";

export interface ThinkingStepData {
	id: string;
	title: string;
	status: "pending" | "in_progress" | "completed";
	items: string[];
}

export type ContentPart =
	| { type: "text"; text: string }
	| { type: "reasoning"; text: string }
	| {
			type: "tool-call";
			toolCallId: string;
			toolName: string;
			args: Record<string, unknown>;
			result?: unknown;
			/**
			 * Live / finalized JSON text for the tool's input arguments.
			 *
			 * - During streaming: accumulated partial JSON text from
			 *   ``tool-input-delta`` events (may be invalid JSON
			 *   mid-stream). assistant-ui's argsText parser tolerates
			 *   invalid JSON gracefully (changelog 0.7.32 / 0.7.78).
			 * - On completion (``tool-input-available``): replaced with
			 *   ``JSON.stringify(input, null, 2)`` so the post-stream
			 *   card renders pretty-printed JSON instead of the
			 *   model's possibly-fragmented formatting.
			 *
			 * Per assistant-ui ``ThreadMessageLike`` precedence
			 * (changelog 0.11.6 ``d318c83``), when ``argsText`` is
			 * supplied it wins over ``JSON.stringify(args)``.
			 */
			argsText?: string;
			/**
			 * Authoritative LangChain ``tool_call.id`` propagated by the backend
			 * via ``langchainToolCallId`` on tool-input-start/available and
			 * tool-output-available events. Used to join a card to the
			 * matching ``AgentActionLog`` row exposed by
			 * ``GET /threads/{id}/actions`` and the streamed
			 * ``data-action-log`` events.
			 */
			langchainToolCallId?: string;
	  }
	| {
			type: "data-thinking-steps";
			data: { steps: ThinkingStepData[] };
	  }
	| {
			/**
			 * Between-step separator. Pushed by `addStepSeparator` when
			 * a `start-step` SSE event arrives AFTER the message already
			 * has non-step content. Rendered by `StepSeparatorDataUI`
			 * (see assistant-ui/step-separator.tsx).
			 */
			type: "data-step-separator";
			data: { stepIndex: number };
	  };

export interface ContentPartsState {
	contentParts: ContentPart[];
	currentTextPartIndex: number;
	currentReasoningPartIndex: number;
	toolCallIndices: Map<string, number>;
}

function areThinkingStepsEqual(current: ThinkingStepData[], next: ThinkingStepData[]): boolean {
	if (current.length !== next.length) return false;

	for (let i = 0; i < current.length; i += 1) {
		const curr = current[i];
		const nxt = next[i];
		if (curr.id !== nxt.id || curr.title !== nxt.title || curr.status !== nxt.status) {
			return false;
		}
		if (curr.items.length !== nxt.items.length) return false;
		for (let j = 0; j < curr.items.length; j += 1) {
			if (curr.items[j] !== nxt.items[j]) return false;
		}
	}

	return true;
}

export function updateThinkingSteps(
	state: ContentPartsState,
	steps: Map<string, ThinkingStepData>
): boolean {
	const stepsArray = Array.from(steps.values());
	const existingIdx = state.contentParts.findIndex((p) => p.type === "data-thinking-steps");

	if (existingIdx >= 0) {
		const existing = state.contentParts[existingIdx];
		if (
			existing?.type === "data-thinking-steps" &&
			areThinkingStepsEqual(existing.data.steps, stepsArray)
		) {
			return false;
		}

		state.contentParts[existingIdx] = {
			type: "data-thinking-steps",
			data: { steps: stepsArray },
		};
		return true;
	} else {
		state.contentParts.unshift({
			type: "data-thinking-steps",
			data: { steps: stepsArray },
		});
		if (state.currentTextPartIndex >= 0) {
			state.currentTextPartIndex += 1;
		}
		if (state.currentReasoningPartIndex >= 0) {
			state.currentReasoningPartIndex += 1;
		}
		for (const [id, idx] of state.toolCallIndices) {
			state.toolCallIndices.set(id, idx + 1);
		}
		return true;
	}
}

/**
 * Coalesces rapid setMessages calls into at most one React state update per
 * throttle interval. During streaming, SSE text-delta events arrive much
 * faster than the user can perceive; throttling to ~50 ms lets React +
 * ReactMarkdown do far fewer reconciliation passes, eliminating flicker.
 */
export class FrameBatchedUpdater {
	private timerId: ReturnType<typeof setTimeout> | null = null;
	private flusher: (() => void) | null = null;
	private dirty = false;
	private static readonly INTERVAL_MS = 50;

	/** Mark state as dirty — will flush after the throttle interval. */
	schedule(flush: () => void): void {
		this.flusher = flush;
		this.dirty = true;
		if (this.timerId === null) {
			this.timerId = setTimeout(() => {
				this.timerId = null;
				if (this.dirty) {
					this.dirty = false;
					this.flusher?.();
				}
			}, FrameBatchedUpdater.INTERVAL_MS);
		}
	}

	/** Immediately flush any pending update (call on tool events or stream end). */
	flush(): void {
		if (this.timerId !== null) {
			clearTimeout(this.timerId);
			this.timerId = null;
		}
		if (this.dirty) {
			this.dirty = false;
			this.flusher?.();
		}
	}

	dispose(): void {
		if (this.timerId !== null) {
			clearTimeout(this.timerId);
			this.timerId = null;
		}
		this.dirty = false;
		this.flusher = null;
	}
}

export function appendText(state: ContentPartsState, delta: string): void {
	// First text delta after a reasoning block: close the reasoning so
	// the assistant-ui renderer treats them as separate parts (the
	// reasoning block collapses; the answer streams below).
	if (state.currentReasoningPartIndex >= 0) {
		state.currentReasoningPartIndex = -1;
	}
	if (
		state.currentTextPartIndex >= 0 &&
		state.contentParts[state.currentTextPartIndex]?.type === "text"
	) {
		(state.contentParts[state.currentTextPartIndex] as { type: "text"; text: string }).text +=
			delta;
	} else {
		state.contentParts.push({ type: "text", text: delta });
		state.currentTextPartIndex = state.contentParts.length - 1;
	}
}

export function appendReasoning(state: ContentPartsState, delta: string): void {
	// Symmetric to appendText: open a fresh reasoning block on first
	// delta, then accumulate into it. ``endReasoning`` simply closes
	// the active block; subsequent reasoning deltas would open a new
	// one (matching ``text-start/end`` semantics on the wire).
	if (state.currentTextPartIndex >= 0) {
		state.currentTextPartIndex = -1;
	}
	if (
		state.currentReasoningPartIndex >= 0 &&
		state.contentParts[state.currentReasoningPartIndex]?.type === "reasoning"
	) {
		(
			state.contentParts[state.currentReasoningPartIndex] as {
				type: "reasoning";
				text: string;
			}
		).text += delta;
	} else {
		state.contentParts.push({ type: "reasoning", text: delta });
		state.currentReasoningPartIndex = state.contentParts.length - 1;
	}
}

export function endReasoning(state: ContentPartsState): void {
	state.currentReasoningPartIndex = -1;
}

export function addStepSeparator(state: ContentPartsState): void {
	// Push a divider between consecutive model steps within a single
	// assistant turn. We only emit it when the message already has
	// non-step content (so the FIRST step of a turn doesn't
	// generate a leading separator) and when the previous part isn't
	// itself a separator (defensive against duplicate `start-step`
	// events).
	const hasContent = state.contentParts.some(
		(p) => p.type === "text" || p.type === "reasoning" || p.type === "tool-call"
	);
	if (!hasContent) return;
	const last = state.contentParts[state.contentParts.length - 1];
	if (last && last.type === "data-step-separator") return;

	const stepIndex = state.contentParts.filter((p) => p.type === "data-step-separator").length;
	state.contentParts.push({ type: "data-step-separator", data: { stepIndex } });
	state.currentTextPartIndex = -1;
	state.currentReasoningPartIndex = -1;
}

/**
 * Allowlist of tool names that should produce a UI tool card. The
 * sentinel ``"all"`` matches every tool — we dropped the legacy
 * ``BASE_TOOLS_WITH_UI`` gate so that ALL tool calls render via the
 * generic ``ToolFallback``. The backend's ``format_thinking_step``
 * summarisation and the defensive ``result_length``-only default for
 * unknown tools keep persisted message JSON from ballooning.
 */
export type ToolUIGate = Set<string> | "all";

function _toolPasses(gate: ToolUIGate, toolName: string): boolean {
	return gate === "all" || gate.has(toolName);
}

export function addToolCall(
	state: ContentPartsState,
	toolsWithUI: ToolUIGate,
	toolCallId: string,
	toolName: string,
	args: Record<string, unknown>,
	force = false,
	langchainToolCallId?: string
): void {
	if (force || _toolPasses(toolsWithUI, toolName)) {
		state.contentParts.push({
			type: "tool-call",
			toolCallId,
			toolName,
			args,
			...(langchainToolCallId ? { langchainToolCallId } : {}),
		});
		state.toolCallIndices.set(toolCallId, state.contentParts.length - 1);
		state.currentTextPartIndex = -1;
		state.currentReasoningPartIndex = -1;
	}
}

/**
 * Reverse-lookup helper used by the SSE ``data-action-log`` handler:
 * given the LangChain ``tool_call.id`` (set on the content part as
 * ``langchainToolCallId``), return the synthetic ``toolCallId`` that
 * the chat tool card uses (``call_<run-id>``). Returns ``null`` when no
 * matching tool card has been seen yet — the action is still recorded
 * in the LC-id-keyed atom so the card can pick it up when it eventually
 * arrives.
 */
export function findToolCallIdByLcId(
	state: ContentPartsState,
	lcToolCallId: string
): string | null {
	for (const part of state.contentParts) {
		if (part.type === "tool-call" && part.langchainToolCallId === lcToolCallId) {
			return part.toolCallId;
		}
	}
	return null;
}

export function updateToolCall(
	state: ContentPartsState,
	toolCallId: string,
	update: {
		args?: Record<string, unknown>;
		argsText?: string;
		result?: unknown;
		langchainToolCallId?: string;
	}
): void {
	const index = state.toolCallIndices.get(toolCallId);
	if (index !== undefined && state.contentParts[index]?.type === "tool-call") {
		const tc = state.contentParts[index] as ContentPart & { type: "tool-call" };
		if (update.args) tc.args = update.args;
		// ``!== undefined`` (NOT a truthy check): an explicit empty
		// string CAN clear, and a finalization with
		// ``JSON.stringify({}, null, 2) === "{}"`` (truthy but
		// represents an empty-input call) still applies.
		if (update.argsText !== undefined) tc.argsText = update.argsText;
		if (update.result !== undefined) tc.result = update.result;
		// Only backfill langchainToolCallId if not already set — the
		// authoritative ``on_tool_end`` value should override an earlier
		// best-effort match, but a NULL late-arriving value should not
		// blow away a known good early one.
		if (update.langchainToolCallId && !tc.langchainToolCallId) {
			tc.langchainToolCallId = update.langchainToolCallId;
		}
	}
}

/**
 * Append a streamed args-delta chunk to the active tool call's
 * ``argsText``. No-ops when no card has been registered yet for the
 * given ``toolCallId`` (the matching ``tool-input-start`` either lost
 * the wire race or this id never had a card — either way the deltas
 * have nowhere safe to land).
 */
export function appendToolInputDelta(
	state: ContentPartsState,
	toolCallId: string,
	delta: string
): void {
	const idx = state.toolCallIndices.get(toolCallId);
	if (idx === undefined) return;
	const tc = state.contentParts[idx];
	if (tc?.type !== "tool-call") return;
	tc.argsText = (tc.argsText ?? "") + delta;
}

function _hasInterruptResult(part: ContentPart): boolean {
	if (part.type !== "tool-call") return false;
	const r = (part as { result?: unknown }).result;
	return typeof r === "object" && r !== null && "__interrupt__" in r;
}

export function buildContentForUI(
	state: ContentPartsState,
	toolsWithUI: ToolUIGate
): ThreadMessageLike["content"] {
	const filtered = state.contentParts.filter((part) => {
		if (part.type === "text") return part.text.length > 0;
		if (part.type === "reasoning") return part.text.length > 0;
		if (part.type === "tool-call")
			return _toolPasses(toolsWithUI, part.toolName) || _hasInterruptResult(part);
		if (part.type === "data-thinking-steps") return true;
		if (part.type === "data-step-separator") return true;
		return false;
	});
	return filtered.length > 0
		? (filtered as ThreadMessageLike["content"])
		: [{ type: "text", text: "" }];
}

export function buildContentForPersistence(
	state: ContentPartsState,
	toolsWithUI: ToolUIGate
): unknown[] {
	const parts: unknown[] = [];

	for (const part of state.contentParts) {
		if (part.type === "text" && part.text.length > 0) {
			parts.push(part);
		} else if (part.type === "reasoning" && part.text.length > 0) {
			// Persist reasoning blocks so a chat reload re-renders the
			// collapsed thinking section instead of
			// silently dropping it (mirrors the data-thinking-steps
			// branch above).
			parts.push(part);
		} else if (
			part.type === "tool-call" &&
			(_toolPasses(toolsWithUI, part.toolName) || _hasInterruptResult(part))
		) {
			parts.push(part);
		} else if (part.type === "data-thinking-steps") {
			parts.push(part);
		} else if (part.type === "data-step-separator") {
			parts.push(part);
		}
	}

	return parts.length > 0 ? parts : [{ type: "text", text: "" }];
}

export type SSEEvent =
	| { type: "start"; messageId?: string }
	| { type: "finish" }
	| { type: "start-step" }
	| { type: "finish-step" }
	| { type: "text-start"; id: string }
	| { type: "text-delta"; id?: string; delta: string }
	| { type: "text-end"; id: string }
	| { type: "reasoning-start"; id: string }
	| { type: "reasoning-delta"; id?: string; delta: string }
	| { type: "reasoning-end"; id: string }
	| {
			type: "tool-input-start";
			toolCallId: string;
			toolName: string;
			/** Authoritative LangChain ``tool_call.id``. Optional. */
			langchainToolCallId?: string;
	  }
	| {
			/**
			 * Live tool-call argument delta. Concatenated into
			 * ``argsText`` on the matching ``tool-call`` content part
			 * by ``appendToolInputDelta``. parity_v2 only — the legacy
			 * code path emits ``tool-input-available`` without prior
			 * deltas.
			 */
			type: "tool-input-delta";
			toolCallId: string;
			inputTextDelta: string;
	  }
	| {
			type: "tool-input-available";
			toolCallId: string;
			toolName: string;
			input: Record<string, unknown>;
			langchainToolCallId?: string;
	  }
	| {
			type: "tool-output-available";
			toolCallId: string;
			output: Record<string, unknown>;
			/** Authoritative LangChain ``tool_call.id`` extracted from
			 * ``ToolMessage.tool_call_id`` at on_tool_end. Backfills cards
			 * that didn't get the id at tool-input-start time. */
			langchainToolCallId?: string;
	  }
	| { type: "data-thinking-step"; data: ThinkingStepData }
	| { type: "data-thread-title-update"; data: { threadId: number; title: string } }
	| { type: "data-interrupt-request"; data: Record<string, unknown> }
	| { type: "data-documents-updated"; data: Record<string, unknown> }
	| {
			/**
			 * A freshly committed AgentActionLog row. Frontend stores
			 * this in a Map keyed off ``lc_tool_call_id`` so the chat
			 * tool card can light up its Revert button.
			 */
			type: "data-action-log";
			data: {
				id: number;
				lc_tool_call_id: string | null;
				chat_turn_id: string | null;
				tool_name: string;
				reversible: boolean;
				reverse_descriptor_present: boolean;
				created_at: string | null;
				error: boolean;
			};
	  }
	| {
			/**
			 * Reversibility flipped (filesystem op SAVEPOINT committed;
			 * cf. ``kb_persistence._dispatch_reversibility_update``).
			 */
			type: "data-action-log-updated";
			data: { id: number; reversible: boolean };
	  }
	| {
			/**
			 * Emitted at the start of every stream so the frontend can
			 * stamp the per-turn correlation id onto the in-flight
			 * assistant message and replay it via
			 * ``appendMessage``. Pure-text turns never produce
			 * action-log events; this event guarantees the frontend
			 * always learns the turn id.
			 */
			type: "data-turn-info";
			data: { chat_turn_id: string };
	  }
	| {
			/**
			 * Best-effort revert pass that ran BEFORE this regeneration.
			 * Per-action results are forwarded to the UI so the user
			 * can see which downstream actions were rolled
			 * back vs which couldn't be undone.
			 */
			type: "data-revert-results";
			data: {
				status: "ok" | "partial";
				chat_turn_ids: string[];
				total: number;
				reverted: number;
				already_reverted: number;
				not_reversible: number;
				/**
				 * ``permission_denied`` and ``skipped`` are first-class
				 * counters so the response invariant
				 * ``total === sum(counters)`` always holds. Optional
				 * for forward compatibility with older backends; the
				 * frontend treats missing values as ``0``.
				 */
				permission_denied?: number;
				failed: number;
				skipped?: number;
				results: Array<{
					action_id: number;
					tool_name: string;
					status:
						| "reverted"
						| "already_reverted"
						| "not_reversible"
						| "permission_denied"
						| "failed"
						| "skipped";
					message?: string | null;
					new_action_id?: number | null;
					error?: string | null;
				}>;
			};
	  }
	| {
			type: "data-turn-status";
			data: {
				status: "idle" | "busy" | "cancelling";
				retry_after_ms?: number;
				retry_after_at?: number;
			};
	  }
	| {
			type: "data-token-usage";
			data: {
				usage: Record<
					string,
					{ prompt_tokens: number; completion_tokens: number; total_tokens: number }
				>;
				prompt_tokens: number;
				completion_tokens: number;
				total_tokens: number;
				call_details: Array<{
					model: string;
					prompt_tokens: number;
					completion_tokens: number;
					total_tokens: number;
				}>;
			};
	  }
	| { type: "error"; errorText: string; errorCode?: string };

/**
 * Async generator that reads an SSE stream and yields parsed JSON objects.
 * Handles buffering, event splitting, and skips malformed JSON / [DONE] lines.
 */
export async function* readSSEStream(response: Response): AsyncGenerator<SSEEvent> {
	if (!response.body) {
		throw new Error("No response body");
	}

	const reader = response.body.getReader();
	const decoder = new TextDecoder();
	let buffer = "";

	try {
		while (true) {
			const { done, value } = await reader.read();
			if (done) break;

			buffer += decoder.decode(value, { stream: true });
			const events = buffer.split(/\r?\n\r?\n/);
			buffer = events.pop() || "";

			for (const event of events) {
				const lines = event.split(/\r?\n/);
				for (const line of lines) {
					if (!line.startsWith("data: ")) continue;
					const data = line.slice(6).trim();
					if (!data || data === "[DONE]") continue;

					try {
						yield JSON.parse(data);
					} catch (e) {
						if (e instanceof SyntaxError) continue;
						throw e;
					}
				}
			}
		}
	} finally {
		reader.releaseLock();
	}
}