feat(story-3.5): add cloud-mode LLM model selection with token quota enforcement

Implement system-managed model catalog, subscription tier enforcement, atomic token quota tracking, and frontend cloud/self-hosted conditional rendering. Apply all 20 BMAD code review patches including security fixes (cross-user API key hijack), race condition mitigation (atomic SQL UPDATE), and SSE mid-stream quota error handling. Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
2026-06-24 21:38:09 +02:00 · 2026-04-14 17:01:21 +07:00 · 2026-04-14 17:01:21 +07:00 · c1776b3ec8
commit c1776b3ec8
parent e7382b26de
19 changed files with 1003 additions and 34 deletions
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@ -14,6 +14,8 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { toast } from "sonner";
 import { z } from "zod";
 import { disabledToolsAtom } from "@/atoms/agent-tools/agent-tools.atoms";
+import { selectedSystemModelIdAtom } from "@/atoms/new-llm-config/system-models-query.atoms";
+import { isCloud } from "@/lib/env-config";
 import {
 	clearTargetCommentIdAtom,
 	currentThreadAtom,
@ -173,6 +175,16 @@ function extractMentionedDocuments(content: unknown): MentionedDocumentInfo[] {
 	return [];
 }

+/**
+ * Throw this when the backend returns 402 Payment Required (quota exceeded).
+ */
+class QuotaExceededError extends Error {
+	constructor() {
+		super("Token quota exceeded");
+		this.name = "QuotaExceededError";
+	}
+}
+
 /**
 * Tools that should render custom UI in the chat.
 */
@ -230,6 +242,9 @@ export default function NewChatPage() {
 	// Get disabled tools from the tool toggle UI
 	const disabledTools = useAtomValue(disabledToolsAtom);

+	// Cloud mode: selected system model ID (null = backend default)
+	const selectedSystemModelId = useAtomValue(selectedSystemModelIdAtom);
+
 	// Get mentioned document IDs from the composer (derived from @ mentions + sidebar selections)
 	const mentionedDocumentIds = useAtomValue(mentionedDocumentIdsAtom);
 	const mentionedDocuments = useAtomValue(mentionedDocumentsAtom);
@ -704,11 +719,13 @@ export default function NewChatPage() {
 							? mentionedDocumentIds.surfsense_doc_ids
 							: undefined,
 						disabled_tools: disabledTools.length > 0 ? disabledTools : undefined,
+						...(isCloud() && selectedSystemModelId != null && { model_id: selectedSystemModelId }),
 					}),
 					signal: controller.signal,
 				});

 				if (!response.ok) {
+					if (response.status === 402) throw new QuotaExceededError();
 					throw new Error(`Backend error: ${response.status}`);
 				}

@ -847,6 +864,9 @@ export default function NewChatPage() {
 						}

 						case "error":
+							if (parsed.errorText?.includes("quota") || parsed.errorText?.includes("token_quota_exceeded")) {
+								throw new QuotaExceededError();
+							}
 							throw new Error(parsed.errorText || "Server error");
 					}
 				}
@ -909,6 +929,15 @@ export default function NewChatPage() {
 					}
 					return;
 				}
+				if (error instanceof QuotaExceededError) {
+					toast.error("Monthly token quota exceeded. Upgrade your plan to continue.", {
+						action: {
+							label: "Upgrade",
+							onClick: () => window.open("/pricing", "_blank"),
+						},
+					});
+					return;
+				}
 				console.error("[NewChatPage] Chat error:", error);

 				// Track chat error
@ -955,6 +984,7 @@ export default function NewChatPage() {
 			currentUser,
 			disabledTools,
 			updateChatTabTitle,
+			selectedSystemModelId,
 		]
 	);

@ -1062,11 +1092,13 @@ export default function NewChatPage() {
 					body: JSON.stringify({
 						search_space_id: searchSpaceId,
 						decisions,
+						...(isCloud() && selectedSystemModelId != null && { model_id: selectedSystemModelId }),
 					}),
 					signal: controller.signal,
 				});

 				if (!response.ok) {
+					if (response.status === 402) throw new QuotaExceededError();
 					throw new Error(`Backend error: ${response.status}`);
 				}

@ -1175,6 +1207,9 @@ export default function NewChatPage() {
 						}

 						case "error":
+							if (parsed.errorText?.includes("quota") || parsed.errorText?.includes("token_quota_exceeded")) {
+								throw new QuotaExceededError();
+							}
 							throw new Error(parsed.errorText || "Server error");
 					}
 				}
@ -1201,6 +1236,15 @@ export default function NewChatPage() {
 				if (error instanceof Error && error.name === "AbortError") {
 					return;
 				}
+				if (error instanceof QuotaExceededError) {
+					toast.error("Monthly token quota exceeded. Upgrade your plan to continue.", {
+						action: {
+							label: "Upgrade",
+							onClick: () => window.open("/pricing", "_blank"),
+						},
+					});
+					return;
+				}
 				console.error("[NewChatPage] Resume error:", error);
 				toast.error("Failed to resume. Please try again.");
 			} finally {
@ -1380,11 +1424,13 @@ export default function NewChatPage() {
 						search_space_id: searchSpaceId,
 						user_query: newUserQuery || null,
 						disabled_tools: disabledTools.length > 0 ? disabledTools : undefined,
+						...(isCloud() && selectedSystemModelId != null && { model_id: selectedSystemModelId }),
 					}),
 					signal: controller.signal,
 				});

 				if (!response.ok) {
+					if (response.status === 402) throw new QuotaExceededError();
 					throw new Error(`Backend error: ${response.status}`);
 				}

@ -1454,6 +1500,9 @@ export default function NewChatPage() {
 						}

 						case "error":
+							if (parsed.errorText?.includes("quota") || parsed.errorText?.includes("token_quota_exceeded")) {
+								throw new QuotaExceededError();
+							}
 							throw new Error(parsed.errorText || "Server error");
 					}
 				}
@ -1502,6 +1551,15 @@ export default function NewChatPage() {
 					return;
 				}
 				batcher.dispose();
+				if (error instanceof QuotaExceededError) {
+					toast.error("Monthly token quota exceeded. Upgrade your plan to continue.", {
+						action: {
+							label: "Upgrade",
+							onClick: () => window.open("/pricing", "_blank"),
+						},
+					});
+					return;
+				}
 				console.error("[NewChatPage] Regeneration error:", error);
 				trackChatError(
 					searchSpaceId,
@ -1524,7 +1582,7 @@ export default function NewChatPage() {
 				abortControllerRef.current = null;
 			}
 		},
-		[threadId, searchSpaceId, messages, disabledTools]
+		[threadId, searchSpaceId, messages, disabledTools, selectedSystemModelId]
 	);

 	// Handle editing a message - truncates history and regenerates with new query
--- a/surfsense_web/atoms/new-llm-config/system-models-query.atoms.ts
+++ b/surfsense_web/atoms/new-llm-config/system-models-query.atoms.ts
@ -0,0 +1,30 @@
+import { atom } from "jotai";
+import { atomWithQuery } from "jotai-tanstack-query";
+import { newLLMConfigApiService } from "@/lib/apis/new-llm-config-api.service";
+import { isCloud } from "@/lib/env-config";
+import { cacheKeys } from "@/lib/query-client/cache-keys";
+
+/**
+ * Query atom for fetching the system-managed LLM catalogue.
+ * Only fetches in cloud mode (DEPLOYMENT_MODE=cloud).
+ * Returns models with negative IDs configured in the backend YAML.
+ */
+export const systemModelsAtom = atomWithQuery(() => {
+	return {
+		queryKey: cacheKeys.systemModels.all(),
+		staleTime: 10 * 60 * 1000, // 10 minutes - system models rarely change
+		enabled: isCloud(), // Only fetch when in cloud mode
+		queryFn: async () => {
+			return newLLMConfigApiService.getSystemModels();
+		},
+	};
+});
+
+/**
+ * Atom holding the currently selected system model ID (negative integer).
+ * null means no explicit selection — backend will use its default.
+ *
+ * NOTE: This is a global atom — it persists across search spaces within
+ * a session. The ChatHeader component should reset it when needed.
+ */
+export const selectedSystemModelIdAtom = atom<number | null>(null);
--- a/surfsense_web/components/new-chat/chat-header.tsx
+++ b/surfsense_web/components/new-chat/chat-header.tsx
@ -1,6 +1,8 @@
 "use client";

-import { useCallback, useState } from "react";
+import { useCallback, useEffect, useState } from "react";
+import { useSetAtom } from "jotai";
+import { selectedSystemModelIdAtom } from "@/atoms/new-llm-config/system-models-query.atoms";
 import { ImageConfigDialog } from "@/components/shared/image-config-dialog";
 import { ModelConfigDialog } from "@/components/shared/model-config-dialog";
 import { VisionConfigDialog } from "@/components/shared/vision-config-dialog";
@ -12,7 +14,9 @@ import type {
 	NewLLMConfigPublic,
 	VisionLLMConfig,
 } from "@/contracts/types/new-llm-config.types";
+import { isCloud } from "@/lib/env-config";
 import { ModelSelector } from "./model-selector";
+import { SystemModelSelector } from "./system-model-selector";

 interface ChatHeaderProps {
 	searchSpaceId: number;
@ -20,6 +24,12 @@ interface ChatHeaderProps {
 }

 export function ChatHeader({ searchSpaceId, className }: ChatHeaderProps) {
+	// Reset system model selection when search space changes
+	const setSelectedSystemModelId = useSetAtom(selectedSystemModelIdAtom);
+	useEffect(() => {
+		setSelectedSystemModelId(null);
+	}, [searchSpaceId, setSelectedSystemModelId]);
+
 	// LLM config dialog state
 	const [dialogOpen, setDialogOpen] = useState(false);
 	const [selectedConfig, setSelectedConfig] = useState<
@ -115,15 +125,19 @@ export function ChatHeader({ searchSpaceId, className }: ChatHeaderProps) {

 	return (
 		<div className="flex items-center gap-2">
-			<ModelSelector
-				onEditLLM={handleEditLLMConfig}
-				onAddNewLLM={handleAddNewLLM}
-				onEditImage={handleEditImageConfig}
-				onAddNewImage={handleAddImageModel}
-				onEditVision={handleEditVisionConfig}
-				onAddNewVision={handleAddVisionModel}
-				className={className}
-			/>
+			{isCloud() ? (
+				<SystemModelSelector className={className} />
+			) : (
+				<ModelSelector
+					onEditLLM={handleEditLLMConfig}
+					onAddNewLLM={handleAddNewLLM}
+					onEditImage={handleEditImageConfig}
+					onAddNewImage={handleAddImageModel}
+					onEditVision={handleEditVisionConfig}
+					onAddNewVision={handleAddVisionModel}
+					className={className}
+				/>
+			)}
 			<ModelConfigDialog
 				open={dialogOpen}
 				onOpenChange={handleDialogClose}
--- a/surfsense_web/components/new-chat/system-model-selector.tsx
+++ b/surfsense_web/components/new-chat/system-model-selector.tsx
@ -0,0 +1,148 @@
+"use client";
+
+import { useAtom, useAtomValue } from "jotai";
+import { Bot, Check, ChevronDown, Crown, Zap } from "lucide-react";
+import { useState } from "react";
+import {
+	selectedSystemModelIdAtom,
+	systemModelsAtom,
+} from "@/atoms/new-llm-config/system-models-query.atoms";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import {
+	Command,
+	CommandEmpty,
+	CommandGroup,
+	CommandInput,
+	CommandItem,
+	CommandList,
+} from "@/components/ui/command";
+import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { Spinner } from "@/components/ui/spinner";
+import type { SystemModelItem } from "@/contracts/types/new-llm-config.types";
+import { cn } from "@/lib/utils";
+
+interface SystemModelSelectorProps {
+	className?: string;
+}
+
+const TIER_CONFIG: Record<string, { label: string; icon: React.ComponentType<{ className?: string }>; variant: "default" | "secondary" | "outline" }> = {
+	free: { label: "Free", icon: Zap, variant: "secondary" },
+	pro: { label: "Pro", icon: Crown, variant: "default" },
+	enterprise: { label: "Enterprise", icon: Crown, variant: "default" },
+};
+
+function TierBadge({ tier }: { tier: string }) {
+	const config = TIER_CONFIG[tier.toLowerCase()] ?? { label: tier, icon: Zap, variant: "outline" as const };
+	const Icon = config.icon;
+	return (
+		<Badge variant={config.variant} className="ml-auto flex items-center gap-1 text-[10px] px-1.5 py-0 h-4">
+			<Icon className="h-2.5 w-2.5" />
+			{config.label}
+		</Badge>
+	);
+}
+
+export function SystemModelSelector({ className }: SystemModelSelectorProps) {
+	const [open, setOpen] = useState(false);
+	const [searchQuery, setSearchQuery] = useState("");
+	const { data: models, isPending } = useAtomValue(systemModelsAtom);
+	const [selectedId, setSelectedId] = useAtom(selectedSystemModelIdAtom);
+
+	const selectedModel: SystemModelItem | undefined =
+		selectedId != null ? models?.find((m) => m.id === selectedId) : undefined;
+
+	// Use first model as implicit default when nothing selected; guard empty array
+	const displayModel = selectedModel ?? (models && models.length > 0 ? models[0] : undefined);
+
+	// Auto-select the first model so the ID is available for API calls
+	const effectiveId = selectedId ?? displayModel?.id ?? null;
+
+	const filteredModels = models?.filter(
+		(m) =>
+			!searchQuery ||
+			m.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
+			m.provider.toLowerCase().includes(searchQuery.toLowerCase()) ||
+			m.model_name.toLowerCase().includes(searchQuery.toLowerCase())
+	) ?? [];
+
+	function handleSelect(model: SystemModelItem) {
+		setSelectedId(model.id);
+		setOpen(false);
+		setSearchQuery("");
+	}
+
+	return (
+		<Popover open={open} onOpenChange={setOpen}>
+			<PopoverTrigger asChild>
+				<Button
+					variant="outline"
+					size="sm"
+					className={cn(
+						"flex items-center gap-2 h-8 px-3 text-sm font-normal",
+						className
+					)}
+					aria-label="Select AI model"
+				>
+					<Bot className="h-4 w-4 shrink-0 text-muted-foreground" />
+					{isPending ? (
+						<Spinner className="h-3 w-3" />
+					) : displayModel ? (
+						<span className="max-w-[140px] truncate">{displayModel.name}</span>
+					) : (
+						<span className="text-muted-foreground">Select model</span>
+					)}
+					<ChevronDown className="h-3 w-3 shrink-0 text-muted-foreground ml-1" />
+				</Button>
+			</PopoverTrigger>
+			<PopoverContent className="w-72 p-0" align="start">
+				<Command shouldFilter={false}>
+					<CommandInput
+						placeholder="Search models…"
+						value={searchQuery}
+						onValueChange={setSearchQuery}
+					/>
+					<CommandList className="max-h-64">
+						{isPending ? (
+							<div className="flex items-center justify-center py-6">
+								<Spinner className="h-5 w-5" />
+							</div>
+						) : filteredModels.length === 0 ? (
+							<CommandEmpty>No models found.</CommandEmpty>
+						) : (
+							<CommandGroup>
+								{filteredModels.map((model) => {
+									const isSelected =
+										selectedId === model.id ||
+										(selectedId === null && displayModel?.id === model.id);
+									return (
+										<CommandItem
+											key={model.id}
+											value={String(model.id)}
+											onSelect={() => handleSelect(model)}
+											className="flex items-center gap-2 cursor-pointer"
+										>
+											<Check
+												className={cn(
+													"h-3.5 w-3.5 shrink-0",
+													isSelected ? "opacity-100" : "opacity-0"
+												)}
+											/>
+											<div className="flex flex-col flex-1 min-w-0">
+												<span className="truncate font-medium text-sm">{model.name}</span>
+												<span className="truncate text-[11px] text-muted-foreground">
+													{model.model_name}
+												</span>
+											</div>
+											<TierBadge tier={model.tier_required} />
+										</CommandItem>
+									);
+								})}
+							</CommandGroup>
+						)}
+					</CommandList>
+				</Command>
+			</PopoverContent>
+		</Popover>
+	);
+}
--- a/surfsense_web/components/settings/search-space-settings-dialog.tsx
+++ b/surfsense_web/components/settings/search-space-settings-dialog.tsx
@ -17,6 +17,7 @@ import { useTranslations } from "next-intl";
 import type React from "react";
 import { searchSpaceSettingsDialogAtom } from "@/atoms/settings/settings-dialog.atoms";
 import { SettingsDialog } from "@/components/settings/settings-dialog";
+import { isCloud } from "@/lib/env-config";

 const GeneralSettingsManager = dynamic(
 	() =>
@ -85,20 +86,27 @@ export function SearchSpaceSettingsDialog({ searchSpaceId }: SearchSpaceSettings
 	const t = useTranslations("searchSpaceSettings");
 	const [state, setState] = useAtom(searchSpaceSettingsDialogAtom);

+	const cloudMode = isCloud();
+
 	const navItems = [
 		{ value: "general", label: t("nav_general"), icon: <CircleUser className="h-4 w-4" /> },
 		{ value: "roles", label: t("nav_role_assignments"), icon: <ListChecks className="h-4 w-4" /> },
-		{ value: "models", label: t("nav_agent_configs"), icon: <Bot className="h-4 w-4" /> },
-		{
-			value: "image-models",
-			label: t("nav_image_models"),
-			icon: <ImageIcon className="h-4 w-4" />,
-		},
-		{
-			value: "vision-models",
-			label: t("nav_vision_models"),
-			icon: <Eye className="h-4 w-4" />,
-		},
+		// BYOK model config panels — hidden in cloud mode (system models are managed centrally)
+		...(!cloudMode
+			? [
+					{ value: "models", label: t("nav_agent_configs"), icon: <Bot className="h-4 w-4" /> },
+					{
+						value: "image-models",
+						label: t("nav_image_models"),
+						icon: <ImageIcon className="h-4 w-4" />,
+					},
+					{
+						value: "vision-models",
+						label: t("nav_vision_models"),
+						icon: <Eye className="h-4 w-4" />,
+					},
+			  ]
+			: []),
 		{ value: "team-roles", label: t("nav_team_roles"), icon: <UserKey className="h-4 w-4" /> },
 		{
 			value: "prompts",
@ -115,10 +123,13 @@ export function SearchSpaceSettingsDialog({ searchSpaceId }: SearchSpaceSettings

 	const content: Record<string, React.ReactNode> = {
 		general: <GeneralSettingsManager searchSpaceId={searchSpaceId} />,
-		models: <ModelConfigManager searchSpaceId={searchSpaceId} />,
+		// BYOK panels — only rendered in self-hosted mode
+		...(!cloudMode && {
+			models: <ModelConfigManager searchSpaceId={searchSpaceId} />,
+			"image-models": <ImageModelManager searchSpaceId={searchSpaceId} />,
+			"vision-models": <VisionModelManager searchSpaceId={searchSpaceId} />,
+		}),
 		roles: <LLMRoleManager searchSpaceId={searchSpaceId} />,
-		"image-models": <ImageModelManager searchSpaceId={searchSpaceId} />,
-		"vision-models": <VisionModelManager searchSpaceId={searchSpaceId} />,
 		"team-roles": <RolesManager searchSpaceId={searchSpaceId} />,
 		prompts: <PromptConfigManager searchSpaceId={searchSpaceId} />,
 		"team-memory": <TeamMemoryManager searchSpaceId={searchSpaceId} />,
--- a/surfsense_web/contracts/types/new-llm-config.types.ts
+++ b/surfsense_web/contracts/types/new-llm-config.types.ts
@ -166,6 +166,27 @@ export const globalNewLLMConfig = z.object({

 export const getGlobalNewLLMConfigsResponse = z.array(globalNewLLMConfig);

+// =============================================================================
+// System Model Catalog (cloud mode — backend-managed LLMs)
+// =============================================================================
+
+/**
+ * SystemModelItem — a backend-managed LLM exposed via GET /api/v1/models/system
+ * id is negative (e.g. -1, -2, …), distinct from user configs (positive) and Auto mode (0)
+ */
+export const systemModelItem = z.object({
+	id: z.number(),
+	name: z.string(),
+	description: z.string().nullable().optional(),
+	provider: z.string(),
+	model_name: z.string(),
+	tier_required: z.string().default("free"),
+});
+
+export const getSystemModelsResponse = z.array(systemModelItem);
+
+export type SystemModelItem = z.infer<typeof systemModelItem>;
+
 // =============================================================================
 // Image Generation Config (separate table from NewLLMConfig)
 // =============================================================================
--- a/surfsense_web/lib/apis/new-llm-config-api.service.ts
+++ b/surfsense_web/lib/apis/new-llm-config-api.service.ts
@ -15,6 +15,7 @@ import {
 	getNewLLMConfigResponse,
 	getNewLLMConfigsRequest,
 	getNewLLMConfigsResponse,
+	getSystemModelsResponse,
 	type UpdateLLMPreferencesRequest,
 	type UpdateNewLLMConfigRequest,
 	updateLLMPreferencesRequest,
@ -153,6 +154,14 @@ class NewLLMConfigApiService {
 		return baseApiService.get(`/api/v1/models`, getModelListResponse);
 	};

+	/**
+	 * Get the system-managed LLM catalogue (cloud mode only)
+	 * Returns backend-configured models from YAML with negative IDs
+	 */
+	getSystemModels = async () => {
+		return baseApiService.get(`/api/v1/models/system`, getSystemModelsResponse);
+	};
+
 	/**
 	 * Update LLM preferences for a search space
 	 */
--- a/surfsense_web/lib/query-client/cache-keys.ts
+++ b/surfsense_web/lib/query-client/cache-keys.ts
@ -105,6 +105,9 @@ export const cacheKeys = {
 		all: () => ["prompts"] as const,
 		public: () => ["prompts", "public"] as const,
 	},
+	systemModels: {
+		all: () => ["models", "system"] as const,
+	},
 	notifications: {
 		search: (searchSpaceId: number | null, search: string, tab: string) =>
 			["notifications", "search", searchSpaceId, search, tab] as const,