feat(story-3.5): add cloud-mode LLM model selection with token quota enforcement

Implement system-managed model catalog, subscription tier enforcement,
atomic token quota tracking, and frontend cloud/self-hosted conditional
rendering. Apply all 20 BMAD code review patches including security
fixes (cross-user API key hijack), race condition mitigation (atomic SQL
UPDATE), and SSE mid-stream quota error handling.

Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
This commit is contained in:
Vonic 2026-04-14 17:01:21 +07:00
parent e7382b26de
commit c1776b3ec8
19 changed files with 1003 additions and 34 deletions

View file

@ -14,6 +14,8 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { toast } from "sonner";
import { z } from "zod";
import { disabledToolsAtom } from "@/atoms/agent-tools/agent-tools.atoms";
import { selectedSystemModelIdAtom } from "@/atoms/new-llm-config/system-models-query.atoms";
import { isCloud } from "@/lib/env-config";
import {
clearTargetCommentIdAtom,
currentThreadAtom,
@ -173,6 +175,16 @@ function extractMentionedDocuments(content: unknown): MentionedDocumentInfo[] {
return [];
}
/**
* Throw this when the backend returns 402 Payment Required (quota exceeded).
*/
class QuotaExceededError extends Error {
constructor() {
super("Token quota exceeded");
this.name = "QuotaExceededError";
}
}
/**
* Tools that should render custom UI in the chat.
*/
@ -230,6 +242,9 @@ export default function NewChatPage() {
// Get disabled tools from the tool toggle UI
const disabledTools = useAtomValue(disabledToolsAtom);
// Cloud mode: selected system model ID (null = backend default)
const selectedSystemModelId = useAtomValue(selectedSystemModelIdAtom);
// Get mentioned document IDs from the composer (derived from @ mentions + sidebar selections)
const mentionedDocumentIds = useAtomValue(mentionedDocumentIdsAtom);
const mentionedDocuments = useAtomValue(mentionedDocumentsAtom);
@ -704,11 +719,13 @@ export default function NewChatPage() {
? mentionedDocumentIds.surfsense_doc_ids
: undefined,
disabled_tools: disabledTools.length > 0 ? disabledTools : undefined,
...(isCloud() && selectedSystemModelId != null && { model_id: selectedSystemModelId }),
}),
signal: controller.signal,
});
if (!response.ok) {
if (response.status === 402) throw new QuotaExceededError();
throw new Error(`Backend error: ${response.status}`);
}
@ -847,6 +864,9 @@ export default function NewChatPage() {
}
case "error":
if (parsed.errorText?.includes("quota") || parsed.errorText?.includes("token_quota_exceeded")) {
throw new QuotaExceededError();
}
throw new Error(parsed.errorText || "Server error");
}
}
@ -909,6 +929,15 @@ export default function NewChatPage() {
}
return;
}
if (error instanceof QuotaExceededError) {
toast.error("Monthly token quota exceeded. Upgrade your plan to continue.", {
action: {
label: "Upgrade",
onClick: () => window.open("/pricing", "_blank"),
},
});
return;
}
console.error("[NewChatPage] Chat error:", error);
// Track chat error
@ -955,6 +984,7 @@ export default function NewChatPage() {
currentUser,
disabledTools,
updateChatTabTitle,
selectedSystemModelId,
]
);
@ -1062,11 +1092,13 @@ export default function NewChatPage() {
body: JSON.stringify({
search_space_id: searchSpaceId,
decisions,
...(isCloud() && selectedSystemModelId != null && { model_id: selectedSystemModelId }),
}),
signal: controller.signal,
});
if (!response.ok) {
if (response.status === 402) throw new QuotaExceededError();
throw new Error(`Backend error: ${response.status}`);
}
@ -1175,6 +1207,9 @@ export default function NewChatPage() {
}
case "error":
if (parsed.errorText?.includes("quota") || parsed.errorText?.includes("token_quota_exceeded")) {
throw new QuotaExceededError();
}
throw new Error(parsed.errorText || "Server error");
}
}
@ -1201,6 +1236,15 @@ export default function NewChatPage() {
if (error instanceof Error && error.name === "AbortError") {
return;
}
if (error instanceof QuotaExceededError) {
toast.error("Monthly token quota exceeded. Upgrade your plan to continue.", {
action: {
label: "Upgrade",
onClick: () => window.open("/pricing", "_blank"),
},
});
return;
}
console.error("[NewChatPage] Resume error:", error);
toast.error("Failed to resume. Please try again.");
} finally {
@ -1380,11 +1424,13 @@ export default function NewChatPage() {
search_space_id: searchSpaceId,
user_query: newUserQuery || null,
disabled_tools: disabledTools.length > 0 ? disabledTools : undefined,
...(isCloud() && selectedSystemModelId != null && { model_id: selectedSystemModelId }),
}),
signal: controller.signal,
});
if (!response.ok) {
if (response.status === 402) throw new QuotaExceededError();
throw new Error(`Backend error: ${response.status}`);
}
@ -1454,6 +1500,9 @@ export default function NewChatPage() {
}
case "error":
if (parsed.errorText?.includes("quota") || parsed.errorText?.includes("token_quota_exceeded")) {
throw new QuotaExceededError();
}
throw new Error(parsed.errorText || "Server error");
}
}
@ -1502,6 +1551,15 @@ export default function NewChatPage() {
return;
}
batcher.dispose();
if (error instanceof QuotaExceededError) {
toast.error("Monthly token quota exceeded. Upgrade your plan to continue.", {
action: {
label: "Upgrade",
onClick: () => window.open("/pricing", "_blank"),
},
});
return;
}
console.error("[NewChatPage] Regeneration error:", error);
trackChatError(
searchSpaceId,
@ -1524,7 +1582,7 @@ export default function NewChatPage() {
abortControllerRef.current = null;
}
},
[threadId, searchSpaceId, messages, disabledTools]
[threadId, searchSpaceId, messages, disabledTools, selectedSystemModelId]
);
// Handle editing a message - truncates history and regenerates with new query

View file

@ -0,0 +1,30 @@
import { atom } from "jotai";
import { atomWithQuery } from "jotai-tanstack-query";
import { newLLMConfigApiService } from "@/lib/apis/new-llm-config-api.service";
import { isCloud } from "@/lib/env-config";
import { cacheKeys } from "@/lib/query-client/cache-keys";
/**
* Query atom for fetching the system-managed LLM catalogue.
* Only fetches in cloud mode (DEPLOYMENT_MODE=cloud).
* Returns models with negative IDs configured in the backend YAML.
*/
export const systemModelsAtom = atomWithQuery(() => {
return {
queryKey: cacheKeys.systemModels.all(),
staleTime: 10 * 60 * 1000, // 10 minutes - system models rarely change
enabled: isCloud(), // Only fetch when in cloud mode
queryFn: async () => {
return newLLMConfigApiService.getSystemModels();
},
};
});
/**
* Atom holding the currently selected system model ID (negative integer).
* null means no explicit selection backend will use its default.
*
* NOTE: This is a global atom it persists across search spaces within
* a session. The ChatHeader component should reset it when needed.
*/
export const selectedSystemModelIdAtom = atom<number | null>(null);

View file

@ -1,6 +1,8 @@
"use client";
import { useCallback, useState } from "react";
import { useCallback, useEffect, useState } from "react";
import { useSetAtom } from "jotai";
import { selectedSystemModelIdAtom } from "@/atoms/new-llm-config/system-models-query.atoms";
import { ImageConfigDialog } from "@/components/shared/image-config-dialog";
import { ModelConfigDialog } from "@/components/shared/model-config-dialog";
import { VisionConfigDialog } from "@/components/shared/vision-config-dialog";
@ -12,7 +14,9 @@ import type {
NewLLMConfigPublic,
VisionLLMConfig,
} from "@/contracts/types/new-llm-config.types";
import { isCloud } from "@/lib/env-config";
import { ModelSelector } from "./model-selector";
import { SystemModelSelector } from "./system-model-selector";
interface ChatHeaderProps {
searchSpaceId: number;
@ -20,6 +24,12 @@ interface ChatHeaderProps {
}
export function ChatHeader({ searchSpaceId, className }: ChatHeaderProps) {
// Reset system model selection when search space changes
const setSelectedSystemModelId = useSetAtom(selectedSystemModelIdAtom);
useEffect(() => {
setSelectedSystemModelId(null);
}, [searchSpaceId, setSelectedSystemModelId]);
// LLM config dialog state
const [dialogOpen, setDialogOpen] = useState(false);
const [selectedConfig, setSelectedConfig] = useState<
@ -115,15 +125,19 @@ export function ChatHeader({ searchSpaceId, className }: ChatHeaderProps) {
return (
<div className="flex items-center gap-2">
<ModelSelector
onEditLLM={handleEditLLMConfig}
onAddNewLLM={handleAddNewLLM}
onEditImage={handleEditImageConfig}
onAddNewImage={handleAddImageModel}
onEditVision={handleEditVisionConfig}
onAddNewVision={handleAddVisionModel}
className={className}
/>
{isCloud() ? (
<SystemModelSelector className={className} />
) : (
<ModelSelector
onEditLLM={handleEditLLMConfig}
onAddNewLLM={handleAddNewLLM}
onEditImage={handleEditImageConfig}
onAddNewImage={handleAddImageModel}
onEditVision={handleEditVisionConfig}
onAddNewVision={handleAddVisionModel}
className={className}
/>
)}
<ModelConfigDialog
open={dialogOpen}
onOpenChange={handleDialogClose}

View file

@ -0,0 +1,148 @@
"use client";
import { useAtom, useAtomValue } from "jotai";
import { Bot, Check, ChevronDown, Crown, Zap } from "lucide-react";
import { useState } from "react";
import {
selectedSystemModelIdAtom,
systemModelsAtom,
} from "@/atoms/new-llm-config/system-models-query.atoms";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import {
Command,
CommandEmpty,
CommandGroup,
CommandInput,
CommandItem,
CommandList,
} from "@/components/ui/command";
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
import { Spinner } from "@/components/ui/spinner";
import type { SystemModelItem } from "@/contracts/types/new-llm-config.types";
import { cn } from "@/lib/utils";
interface SystemModelSelectorProps {
className?: string;
}
const TIER_CONFIG: Record<string, { label: string; icon: React.ComponentType<{ className?: string }>; variant: "default" | "secondary" | "outline" }> = {
free: { label: "Free", icon: Zap, variant: "secondary" },
pro: { label: "Pro", icon: Crown, variant: "default" },
enterprise: { label: "Enterprise", icon: Crown, variant: "default" },
};
function TierBadge({ tier }: { tier: string }) {
const config = TIER_CONFIG[tier.toLowerCase()] ?? { label: tier, icon: Zap, variant: "outline" as const };
const Icon = config.icon;
return (
<Badge variant={config.variant} className="ml-auto flex items-center gap-1 text-[10px] px-1.5 py-0 h-4">
<Icon className="h-2.5 w-2.5" />
{config.label}
</Badge>
);
}
export function SystemModelSelector({ className }: SystemModelSelectorProps) {
const [open, setOpen] = useState(false);
const [searchQuery, setSearchQuery] = useState("");
const { data: models, isPending } = useAtomValue(systemModelsAtom);
const [selectedId, setSelectedId] = useAtom(selectedSystemModelIdAtom);
const selectedModel: SystemModelItem | undefined =
selectedId != null ? models?.find((m) => m.id === selectedId) : undefined;
// Use first model as implicit default when nothing selected; guard empty array
const displayModel = selectedModel ?? (models && models.length > 0 ? models[0] : undefined);
// Auto-select the first model so the ID is available for API calls
const effectiveId = selectedId ?? displayModel?.id ?? null;
const filteredModels = models?.filter(
(m) =>
!searchQuery ||
m.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
m.provider.toLowerCase().includes(searchQuery.toLowerCase()) ||
m.model_name.toLowerCase().includes(searchQuery.toLowerCase())
) ?? [];
function handleSelect(model: SystemModelItem) {
setSelectedId(model.id);
setOpen(false);
setSearchQuery("");
}
return (
<Popover open={open} onOpenChange={setOpen}>
<PopoverTrigger asChild>
<Button
variant="outline"
size="sm"
className={cn(
"flex items-center gap-2 h-8 px-3 text-sm font-normal",
className
)}
aria-label="Select AI model"
>
<Bot className="h-4 w-4 shrink-0 text-muted-foreground" />
{isPending ? (
<Spinner className="h-3 w-3" />
) : displayModel ? (
<span className="max-w-[140px] truncate">{displayModel.name}</span>
) : (
<span className="text-muted-foreground">Select model</span>
)}
<ChevronDown className="h-3 w-3 shrink-0 text-muted-foreground ml-1" />
</Button>
</PopoverTrigger>
<PopoverContent className="w-72 p-0" align="start">
<Command shouldFilter={false}>
<CommandInput
placeholder="Search models…"
value={searchQuery}
onValueChange={setSearchQuery}
/>
<CommandList className="max-h-64">
{isPending ? (
<div className="flex items-center justify-center py-6">
<Spinner className="h-5 w-5" />
</div>
) : filteredModels.length === 0 ? (
<CommandEmpty>No models found.</CommandEmpty>
) : (
<CommandGroup>
{filteredModels.map((model) => {
const isSelected =
selectedId === model.id ||
(selectedId === null && displayModel?.id === model.id);
return (
<CommandItem
key={model.id}
value={String(model.id)}
onSelect={() => handleSelect(model)}
className="flex items-center gap-2 cursor-pointer"
>
<Check
className={cn(
"h-3.5 w-3.5 shrink-0",
isSelected ? "opacity-100" : "opacity-0"
)}
/>
<div className="flex flex-col flex-1 min-w-0">
<span className="truncate font-medium text-sm">{model.name}</span>
<span className="truncate text-[11px] text-muted-foreground">
{model.model_name}
</span>
</div>
<TierBadge tier={model.tier_required} />
</CommandItem>
);
})}
</CommandGroup>
)}
</CommandList>
</Command>
</PopoverContent>
</Popover>
);
}

View file

@ -17,6 +17,7 @@ import { useTranslations } from "next-intl";
import type React from "react";
import { searchSpaceSettingsDialogAtom } from "@/atoms/settings/settings-dialog.atoms";
import { SettingsDialog } from "@/components/settings/settings-dialog";
import { isCloud } from "@/lib/env-config";
const GeneralSettingsManager = dynamic(
() =>
@ -85,20 +86,27 @@ export function SearchSpaceSettingsDialog({ searchSpaceId }: SearchSpaceSettings
const t = useTranslations("searchSpaceSettings");
const [state, setState] = useAtom(searchSpaceSettingsDialogAtom);
const cloudMode = isCloud();
const navItems = [
{ value: "general", label: t("nav_general"), icon: <CircleUser className="h-4 w-4" /> },
{ value: "roles", label: t("nav_role_assignments"), icon: <ListChecks className="h-4 w-4" /> },
{ value: "models", label: t("nav_agent_configs"), icon: <Bot className="h-4 w-4" /> },
{
value: "image-models",
label: t("nav_image_models"),
icon: <ImageIcon className="h-4 w-4" />,
},
{
value: "vision-models",
label: t("nav_vision_models"),
icon: <Eye className="h-4 w-4" />,
},
// BYOK model config panels — hidden in cloud mode (system models are managed centrally)
...(!cloudMode
? [
{ value: "models", label: t("nav_agent_configs"), icon: <Bot className="h-4 w-4" /> },
{
value: "image-models",
label: t("nav_image_models"),
icon: <ImageIcon className="h-4 w-4" />,
},
{
value: "vision-models",
label: t("nav_vision_models"),
icon: <Eye className="h-4 w-4" />,
},
]
: []),
{ value: "team-roles", label: t("nav_team_roles"), icon: <UserKey className="h-4 w-4" /> },
{
value: "prompts",
@ -115,10 +123,13 @@ export function SearchSpaceSettingsDialog({ searchSpaceId }: SearchSpaceSettings
const content: Record<string, React.ReactNode> = {
general: <GeneralSettingsManager searchSpaceId={searchSpaceId} />,
models: <ModelConfigManager searchSpaceId={searchSpaceId} />,
// BYOK panels — only rendered in self-hosted mode
...(!cloudMode && {
models: <ModelConfigManager searchSpaceId={searchSpaceId} />,
"image-models": <ImageModelManager searchSpaceId={searchSpaceId} />,
"vision-models": <VisionModelManager searchSpaceId={searchSpaceId} />,
}),
roles: <LLMRoleManager searchSpaceId={searchSpaceId} />,
"image-models": <ImageModelManager searchSpaceId={searchSpaceId} />,
"vision-models": <VisionModelManager searchSpaceId={searchSpaceId} />,
"team-roles": <RolesManager searchSpaceId={searchSpaceId} />,
prompts: <PromptConfigManager searchSpaceId={searchSpaceId} />,
"team-memory": <TeamMemoryManager searchSpaceId={searchSpaceId} />,

View file

@ -166,6 +166,27 @@ export const globalNewLLMConfig = z.object({
export const getGlobalNewLLMConfigsResponse = z.array(globalNewLLMConfig);
// =============================================================================
// System Model Catalog (cloud mode — backend-managed LLMs)
// =============================================================================
/**
* SystemModelItem a backend-managed LLM exposed via GET /api/v1/models/system
* id is negative (e.g. -1, -2, ), distinct from user configs (positive) and Auto mode (0)
*/
export const systemModelItem = z.object({
id: z.number(),
name: z.string(),
description: z.string().nullable().optional(),
provider: z.string(),
model_name: z.string(),
tier_required: z.string().default("free"),
});
export const getSystemModelsResponse = z.array(systemModelItem);
export type SystemModelItem = z.infer<typeof systemModelItem>;
// =============================================================================
// Image Generation Config (separate table from NewLLMConfig)
// =============================================================================

View file

@ -15,6 +15,7 @@ import {
getNewLLMConfigResponse,
getNewLLMConfigsRequest,
getNewLLMConfigsResponse,
getSystemModelsResponse,
type UpdateLLMPreferencesRequest,
type UpdateNewLLMConfigRequest,
updateLLMPreferencesRequest,
@ -153,6 +154,14 @@ class NewLLMConfigApiService {
return baseApiService.get(`/api/v1/models`, getModelListResponse);
};
/**
* Get the system-managed LLM catalogue (cloud mode only)
* Returns backend-configured models from YAML with negative IDs
*/
getSystemModels = async () => {
return baseApiService.get(`/api/v1/models/system`, getSystemModelsResponse);
};
/**
* Update LLM preferences for a search space
*/

View file

@ -105,6 +105,9 @@ export const cacheKeys = {
all: () => ["prompts"] as const,
public: () => ["prompts", "public"] as const,
},
systemModels: {
all: () => ["models", "system"] as const,
},
notifications: {
search: (searchSpaceId: number | null, search: string, tab: string) =>
["notifications", "search", searchSpaceId, search, tab] as const,