feat: updated agent harness

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-04-28 09:22:19 -07:00
parent 9ec9b64348
commit 31a372bb84
139 changed files with 12583 additions and 1111 deletions

View file

@ -0,0 +1,451 @@
"use client";
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
import { useAtomValue } from "jotai";
import { AlertTriangle, Check, Plus, ShieldCheck, Trash2, X } from "lucide-react";
import { useCallback, useMemo, useState } from "react";
import { toast } from "sonner";
import { agentFlagsAtom } from "@/atoms/agent/agent-flags-query.atom";
import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import {
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from "@/components/ui/alert-dialog";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { Spinner } from "@/components/ui/spinner";
import {
type AgentPermissionAction,
type AgentPermissionRule,
type AgentPermissionRuleCreate,
agentPermissionsApiService,
} from "@/lib/apis/agent-permissions-api.service";
import { AppError } from "@/lib/error";
import { formatRelativeDate } from "@/lib/format-date";
import { cn } from "@/lib/utils";
const ACTION_DESCRIPTIONS: Record<AgentPermissionAction, string> = {
allow: "Always run without prompting",
deny: "Block silently",
ask: "Pause and ask for approval",
};
const ACTION_BADGE: Record<AgentPermissionAction, { label: string; className: string }> = {
allow: { label: "Allow", className: "bg-emerald-500/10 text-emerald-600 border-emerald-500/30" },
deny: { label: "Deny", className: "bg-destructive/10 text-destructive border-destructive/30" },
ask: { label: "Ask", className: "bg-amber-500/10 text-amber-600 border-amber-500/30" },
};
const EMPTY_FORM: AgentPermissionRuleCreate = {
permission: "",
pattern: "*",
action: "ask",
user_id: null,
thread_id: null,
};
function permissionRulesQueryKey(searchSpaceId: number) {
return ["agent-permission-rules", searchSpaceId] as const;
}
function ScopeBadge({ rule }: { rule: AgentPermissionRule }) {
if (rule.thread_id !== null) {
return (
<Badge variant="outline" className="text-[10px]">
Thread #{rule.thread_id}
</Badge>
);
}
if (rule.user_id !== null) {
return (
<Badge variant="outline" className="text-[10px]">
User-specific
</Badge>
);
}
return (
<Badge variant="outline" className="text-[10px]">
Search space
</Badge>
);
}
export function AgentPermissionsContent() {
const searchSpaceIdRaw = useAtomValue(activeSearchSpaceIdAtom);
const searchSpaceId = searchSpaceIdRaw ? Number(searchSpaceIdRaw) : null;
const { data: flags } = useAtomValue(agentFlagsAtom);
const featureEnabled = !!flags?.enable_permission && !flags?.disable_new_agent_stack;
const queryClient = useQueryClient();
const {
data: rules,
isLoading,
isError,
error,
} = useQuery({
queryKey: searchSpaceId
? permissionRulesQueryKey(searchSpaceId)
: ["agent-permission-rules", "none"],
queryFn: () => agentPermissionsApiService.list(searchSpaceId as number),
enabled: !!searchSpaceId && featureEnabled,
staleTime: 60 * 1000,
});
const createMutation = useMutation({
mutationFn: (payload: AgentPermissionRuleCreate) =>
agentPermissionsApiService.create(searchSpaceId as number, payload),
onSuccess: () => {
toast.success("Rule created.");
queryClient.invalidateQueries({
queryKey: permissionRulesQueryKey(searchSpaceId as number),
});
},
onError: (err: unknown) => {
toast.error(err instanceof Error ? err.message : "Failed to create rule.");
},
});
const updateMutation = useMutation({
mutationFn: (params: { ruleId: number; action: AgentPermissionAction; pattern?: string }) =>
agentPermissionsApiService.update(searchSpaceId as number, params.ruleId, {
action: params.action,
pattern: params.pattern,
}),
onSuccess: () => {
queryClient.invalidateQueries({
queryKey: permissionRulesQueryKey(searchSpaceId as number),
});
},
onError: (err: unknown) => {
toast.error(err instanceof Error ? err.message : "Failed to update rule.");
},
});
const deleteMutation = useMutation({
mutationFn: (ruleId: number) =>
agentPermissionsApiService.remove(searchSpaceId as number, ruleId),
onSuccess: () => {
toast.success("Rule deleted.");
queryClient.invalidateQueries({
queryKey: permissionRulesQueryKey(searchSpaceId as number),
});
},
onError: (err: unknown) => {
toast.error(err instanceof Error ? err.message : "Failed to delete rule.");
},
});
const [showForm, setShowForm] = useState(false);
const [formData, setFormData] = useState<AgentPermissionRuleCreate>(EMPTY_FORM);
const [deleteTarget, setDeleteTarget] = useState<number | null>(null);
const sortedRules = useMemo(() => rules ?? [], [rules]);
const handleCreate = useCallback(async () => {
if (!formData.permission.trim()) {
toast.error("Permission is required.");
return;
}
try {
await createMutation.mutateAsync({
...formData,
permission: formData.permission.trim(),
pattern: formData.pattern.trim() || "*",
});
setShowForm(false);
setFormData(EMPTY_FORM);
} catch (err) {
if (err instanceof AppError && err.message) {
// already toasted by onError
}
}
}, [createMutation, formData]);
const handleConfirmDelete = useCallback(async () => {
if (deleteTarget === null) return;
try {
await deleteMutation.mutateAsync(deleteTarget);
} finally {
setDeleteTarget(null);
}
}, [deleteMutation, deleteTarget]);
if (!featureEnabled) {
return (
<Alert className="border-dashed">
<ShieldCheck className="size-4" />
<AlertTitle>Permission middleware is disabled</AlertTitle>
<AlertDescription>
Flip{" "}
<code className="rounded bg-muted px-1 text-[10px]">SURFSENSE_ENABLE_PERMISSION</code> on
the backend to manage allow/deny/ask rules from this panel.
</AlertDescription>
</Alert>
);
}
if (!searchSpaceId) {
return (
<p className="text-sm text-muted-foreground">Open a search space to manage agent rules.</p>
);
}
if (isLoading) {
return (
<div className="flex items-center justify-center py-12">
<Spinner className="size-6" />
</div>
);
}
if (isError) {
return (
<div className="rounded-lg border border-dashed border-destructive/40 p-8 text-center">
<AlertTriangle className="mx-auto size-8 text-destructive/60" />
<p className="mt-2 text-sm text-destructive">Failed to load rules</p>
<p className="text-xs text-muted-foreground">
{error instanceof Error ? error.message : "Unknown error."}
</p>
</div>
);
}
return (
<div className="min-w-0 space-y-6 overflow-hidden">
<div className="flex items-start justify-between gap-3">
<div className="space-y-1">
<p className="text-sm text-muted-foreground">
Tell the agent which tools to allow, deny, or ask before running. Rules use wildcard
patterns and are evaluated at the most specific scope first.
</p>
</div>
{!showForm && (
<Button
size="sm"
onClick={() => {
setShowForm(true);
setFormData(EMPTY_FORM);
}}
className="shrink-0 gap-1.5"
>
<Plus className="size-3.5" />
New rule
</Button>
)}
</div>
{showForm && (
<div className="rounded-lg border border-border/60 bg-card p-6">
<div className="space-y-4">
<h3 className="text-sm font-semibold tracking-tight">New permission rule</h3>
<div className="grid grid-cols-2 gap-3">
<div className="space-y-2">
<Label htmlFor="permission-name">Permission</Label>
<Input
id="permission-name"
value={formData.permission}
placeholder="e.g. tool:create_linear_issue or tool:*"
onChange={(e) => setFormData((p) => ({ ...p, permission: e.target.value }))}
/>
<p className="text-[11px] text-muted-foreground">
Match a tool capability. Use <code className="font-mono">*</code> for wildcards.
</p>
</div>
<div className="space-y-2">
<Label htmlFor="pattern">Argument pattern</Label>
<Input
id="pattern"
value={formData.pattern}
placeholder="*"
onChange={(e) => setFormData((p) => ({ ...p, pattern: e.target.value }))}
/>
<p className="text-[11px] text-muted-foreground">
Wildcard against the canonical argument (e.g. <code>prod-*</code>).
</p>
</div>
</div>
<div className="space-y-2">
<Label>Action</Label>
<Select
value={formData.action}
onValueChange={(value) =>
setFormData((p) => ({ ...p, action: value as AgentPermissionAction }))
}
>
<SelectTrigger>
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="allow">Allow run without asking</SelectItem>
<SelectItem value="ask">Ask pause for approval</SelectItem>
<SelectItem value="deny">Deny block silently</SelectItem>
</SelectContent>
</Select>
<p className="text-[11px] text-muted-foreground">
{ACTION_DESCRIPTIONS[formData.action]}
</p>
</div>
<div className="flex items-center justify-end gap-2 pt-2">
<Button
variant="ghost"
size="sm"
onClick={() => {
setShowForm(false);
setFormData(EMPTY_FORM);
}}
disabled={createMutation.isPending}
>
Cancel
</Button>
<Button
size="sm"
onClick={handleCreate}
disabled={createMutation.isPending || !formData.permission.trim()}
className="relative"
>
<span className={createMutation.isPending ? "opacity-0" : ""}>Create</span>
{createMutation.isPending && <Spinner className="absolute size-3.5" />}
</Button>
</div>
</div>
</div>
)}
{sortedRules.length === 0 && !showForm && (
<div className="rounded-lg border border-dashed border-border/60 p-8 text-center">
<ShieldCheck className="mx-auto size-8 text-muted-foreground/40" />
<p className="mt-2 text-sm text-muted-foreground">No rules yet</p>
<p className="text-xs text-muted-foreground/60">
Without rules the agent uses the deployment default for every tool.
</p>
</div>
)}
{sortedRules.length > 0 && (
<div className="space-y-2">
{sortedRules.map((rule) => {
const badge = ACTION_BADGE[rule.action];
const isUpdating =
updateMutation.isPending && updateMutation.variables?.ruleId === rule.id;
const isDeleting = deleteMutation.isPending && deleteMutation.variables === rule.id;
return (
<div
key={rule.id}
className="group flex flex-col gap-3 rounded-lg border border-border/60 bg-card p-4"
>
<div className="flex items-start justify-between gap-3">
<div className="flex min-w-0 flex-1 flex-col gap-1.5">
<div className="flex flex-wrap items-center gap-1.5">
<code className="truncate rounded bg-muted px-1.5 py-0.5 font-mono text-xs">
{rule.permission}
</code>
{rule.pattern !== "*" && (
<span className="text-xs text-muted-foreground">
<code className="font-mono">{rule.pattern}</code>
</span>
)}
<ScopeBadge rule={rule} />
</div>
<p className="text-[11px] text-muted-foreground">
Created {formatRelativeDate(rule.created_at)}
</p>
</div>
<div className="flex shrink-0 items-center gap-1">
<Select
value={rule.action}
onValueChange={(value) =>
updateMutation.mutate({
ruleId: rule.id,
action: value as AgentPermissionAction,
})
}
disabled={isUpdating || isDeleting}
>
<SelectTrigger
className={cn("h-8 gap-1 border px-2 text-[11px]", badge.className)}
>
<SelectValue>
<span className="flex items-center gap-1">
{rule.action === "allow" && <Check className="size-3" />}
{rule.action === "deny" && <X className="size-3" />}
{badge.label}
</span>
</SelectValue>
</SelectTrigger>
<SelectContent>
<SelectItem value="allow">Allow</SelectItem>
<SelectItem value="ask">Ask</SelectItem>
<SelectItem value="deny">Deny</SelectItem>
</SelectContent>
</Select>
<Button
size="sm"
variant="ghost"
className="size-8 p-0 text-muted-foreground hover:text-destructive"
onClick={() => setDeleteTarget(rule.id)}
disabled={isUpdating || isDeleting}
aria-label="Delete rule"
>
<Trash2 className="size-3.5" />
</Button>
</div>
</div>
</div>
);
})}
</div>
)}
<AlertDialog
open={deleteTarget !== null}
onOpenChange={(open) => !open && setDeleteTarget(null)}
>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>Delete this rule?</AlertDialogTitle>
<AlertDialogDescription>
The agent will fall back to deployment defaults for matching tool calls.
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel disabled={deleteMutation.isPending}>Cancel</AlertDialogCancel>
<AlertDialogAction
onClick={(e) => {
e.preventDefault();
handleConfirmDelete();
}}
disabled={deleteMutation.isPending}
>
{deleteMutation.isPending ? "Deleting…" : "Delete"}
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</div>
);
}

View file

@ -0,0 +1,309 @@
"use client";
import { useAtomValue } from "jotai";
import { CircleCheck, CircleSlash, Cog, RotateCcw } from "lucide-react";
import { useMemo } from "react";
import { agentFlagsAtom } from "@/atoms/agent/agent-flags-query.atom";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import { Badge } from "@/components/ui/badge";
import { Separator } from "@/components/ui/separator";
import { Skeleton } from "@/components/ui/skeleton";
import type { AgentFeatureFlags } from "@/lib/apis/agent-flags-api.service";
import { cn } from "@/lib/utils";
type FlagKey = keyof AgentFeatureFlags;
interface FlagDef {
key: FlagKey;
label: string;
description: string;
envVar: string;
}
interface FlagGroup {
id: string;
title: string;
subtitle: string;
flags: FlagDef[];
}
const FLAG_GROUPS: FlagGroup[] = [
{
id: "tier1",
title: "Tier 1 — Agent quality",
subtitle: "Context editing, retries, fallbacks, doom-loop, tool-call repair.",
flags: [
{
key: "enable_context_editing",
label: "Context editing",
description: "Trim tool outputs and spill old text into backend storage.",
envVar: "SURFSENSE_ENABLE_CONTEXT_EDITING",
},
{
key: "enable_compaction_v2",
label: "Compaction v2",
description: "SurfSense-aware compaction replacing safe summarization.",
envVar: "SURFSENSE_ENABLE_COMPACTION_V2",
},
{
key: "enable_retry_after",
label: "Retry-After",
description: "Honour rate-limit retry-after headers automatically.",
envVar: "SURFSENSE_ENABLE_RETRY_AFTER",
},
{
key: "enable_model_fallback",
label: "Model fallback",
description: "Fail over to a backup model on persistent errors.",
envVar: "SURFSENSE_ENABLE_MODEL_FALLBACK",
},
{
key: "enable_model_call_limit",
label: "Model call limit",
description: "Cap total model calls per turn to prevent budget run-aways.",
envVar: "SURFSENSE_ENABLE_MODEL_CALL_LIMIT",
},
{
key: "enable_tool_call_limit",
label: "Tool call limit",
description: "Cap total tool calls per turn.",
envVar: "SURFSENSE_ENABLE_TOOL_CALL_LIMIT",
},
{
key: "enable_tool_call_repair",
label: "Tool-call name repair",
description: "Recover from lower-cased / fuzzy tool names emitted by smaller models.",
envVar: "SURFSENSE_ENABLE_TOOL_CALL_REPAIR",
},
{
key: "enable_doom_loop",
label: "Doom-loop detection",
description: "Detect repeated identical tool calls and ask the user to confirm.",
envVar: "SURFSENSE_ENABLE_DOOM_LOOP",
},
],
},
{
id: "tier2",
title: "Tier 2 — Safety",
subtitle: "Permission rules, busy-mutex, smarter tool selection.",
flags: [
{
key: "enable_permission",
label: "Permission middleware",
description: "Apply allow/deny/ask rules from the Agent Permissions tab.",
envVar: "SURFSENSE_ENABLE_PERMISSION",
},
{
key: "enable_busy_mutex",
label: "Busy mutex",
description: "Prevent two concurrent runs from corrupting the same thread.",
envVar: "SURFSENSE_ENABLE_BUSY_MUTEX",
},
{
key: "enable_llm_tool_selector",
label: "LLM tool selector",
description: "Use a smaller model to pre-filter the tool list per turn.",
envVar: "SURFSENSE_ENABLE_LLM_TOOL_SELECTOR",
},
],
},
{
id: "tier4",
title: "Tier 4 — Skills + subagents",
subtitle: "Built-in skills, specialized subagents, KB planner runnable.",
flags: [
{
key: "enable_skills",
label: "Skills",
description: "Load on-demand skill packs (kb-research, report-writing, …).",
envVar: "SURFSENSE_ENABLE_SKILLS",
},
{
key: "enable_specialized_subagents",
label: "Specialized subagents",
description: "Spin up explore / report_writer / connector_negotiator subagents.",
envVar: "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS",
},
{
key: "enable_kb_planner_runnable",
label: "KB planner runnable",
description: "Compile a private planner sub-agent for KB search.",
envVar: "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
},
],
},
{
id: "tier5",
title: "Tier 5 — Audit + revert",
subtitle: "Action log + revert route used by the Agent Actions sheet.",
flags: [
{
key: "enable_action_log",
label: "Action log",
description: "Persist every tool call to agent_action_log.",
envVar: "SURFSENSE_ENABLE_ACTION_LOG",
},
{
key: "enable_revert_route",
label: "Revert route",
description: "Allow reverting reversible actions from the action log.",
envVar: "SURFSENSE_ENABLE_REVERT_ROUTE",
},
],
},
{
id: "tier6",
title: "Tier 6 — Plugins",
subtitle: "Optional middleware loaded from entry points.",
flags: [
{
key: "enable_plugin_loader",
label: "Plugin loader",
description: "Load surfsense.plugins entry-point middleware.",
envVar: "SURFSENSE_ENABLE_PLUGIN_LOADER",
},
],
},
{
id: "obs",
title: "Observability",
subtitle: "Telemetry pipelines (orthogonal to feature gating).",
flags: [
{
key: "enable_otel",
label: "OpenTelemetry",
description: "Emit OTel spans (also requires OTEL_EXPORTER_OTLP_ENDPOINT).",
envVar: "SURFSENSE_ENABLE_OTEL",
},
],
},
];
function FlagRow({ def, value }: { def: FlagDef; value: boolean }) {
return (
<div className="flex items-start justify-between gap-4 py-3">
<div className="flex min-w-0 flex-1 flex-col gap-1">
<div className="flex flex-wrap items-center gap-2">
<span className="text-sm font-medium">{def.label}</span>
<code className="rounded bg-muted px-1.5 py-0.5 font-mono text-[10px] text-muted-foreground">
{def.envVar}
</code>
</div>
<p className="text-xs text-muted-foreground">{def.description}</p>
</div>
<Badge
variant={value ? "default" : "secondary"}
className={cn(
"shrink-0 gap-1",
value
? "border-emerald-500/30 bg-emerald-500/10 text-emerald-600"
: "text-muted-foreground"
)}
>
{value ? <CircleCheck className="size-3" /> : <CircleSlash className="size-3" />}
{value ? "On" : "Off"}
</Badge>
</div>
);
}
export function AgentStatusContent() {
const { data: flags, isLoading, isError, error, refetch } = useAtomValue(agentFlagsAtom);
const enabledCount = useMemo(() => {
if (!flags) return 0;
return Object.entries(flags).filter(([k, v]) => k !== "disable_new_agent_stack" && v === true)
.length;
}, [flags]);
if (isLoading) {
return (
<div className="flex flex-col gap-3">
<Skeleton className="h-12 w-full rounded-md" />
<Skeleton className="h-32 w-full rounded-md" />
<Skeleton className="h-32 w-full rounded-md" />
</div>
);
}
if (isError || !flags) {
return (
<Alert variant="destructive">
<AlertTitle>Failed to load agent status</AlertTitle>
<AlertDescription className="flex items-center gap-2">
{error instanceof Error ? error.message : "Unknown error."}
<button
type="button"
onClick={() => refetch()}
className="ml-auto inline-flex items-center gap-1 rounded-md border px-2 py-0.5 text-xs hover:bg-background"
>
<RotateCcw className="size-3" />
Retry
</button>
</AlertDescription>
</Alert>
);
}
const masterOff = flags.disable_new_agent_stack;
return (
<div className="space-y-6">
{masterOff ? (
<Alert variant="destructive">
<Cog className="size-4" />
<AlertTitle>Master kill-switch is on</AlertTitle>
<AlertDescription>
<code className="rounded bg-muted px-1 text-[10px]">
SURFSENSE_DISABLE_NEW_AGENT_STACK=true
</code>
forces every new middleware off, regardless of the individual flags below. Restart the
backend after changing it.
</AlertDescription>
</Alert>
) : (
<Alert>
<Cog className="size-4" />
<AlertTitle className="flex items-center gap-2">
Agent stack
<Badge variant="secondary" className="text-[10px]">
{enabledCount} on
</Badge>
</AlertTitle>
<AlertDescription>
Read-only mirror of the backend's <code>AgentFeatureFlags</code>. Flip an env var and
restart the backend to change a value.
</AlertDescription>
</Alert>
)}
{FLAG_GROUPS.map((group, groupIdx) => {
const allOff = group.flags.every((f) => !flags[f.key]);
return (
<div key={group.id}>
{groupIdx > 0 && <Separator className="my-4" />}
<div className="rounded-lg border border-border/60 bg-card">
<div className="flex items-start justify-between gap-3 border-b px-4 py-3">
<div>
<p className="text-sm font-semibold">{group.title}</p>
<p className="text-xs text-muted-foreground">{group.subtitle}</p>
</div>
{allOff && (
<Badge variant="outline" className="text-[10px] text-muted-foreground">
all off
</Badge>
)}
</div>
<div className="divide-y divide-border/50 px-4">
{group.flags.map((def) => (
<FlagRow key={def.key} def={def} value={flags[def.key]} />
))}
</div>
</div>
</div>
);
})}
</div>
);
}