From ea3c606b28954cfbf31ad5bdb002945bf34e8359 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 15 May 2026 16:33:18 +0200 Subject: [PATCH] fix: close claude-code runtime isolation checks --- .../src/llm/claude-code-runtime.test.ts | 127 ++++++++++++++++++ .../context/src/llm/claude-code-runtime.ts | 32 ++++- 2 files changed, 154 insertions(+), 5 deletions(-) diff --git a/packages/context/src/llm/claude-code-runtime.test.ts b/packages/context/src/llm/claude-code-runtime.test.ts index f776c988..889ab31d 100644 --- a/packages/context/src/llm/claude-code-runtime.test.ts +++ b/packages/context/src/llm/claude-code-runtime.test.ts @@ -155,6 +155,133 @@ describe('ClaudeCodeKtxLlmRuntime', () => { expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 1 }); }); + it('rejects settings-derived agents and non-KTX MCP servers from init messages', async () => { + const query = vi.fn((_input: any) => + stream([ + initMessage({ + agents: ['project-agent'], + mcp_servers: [{ name: 'filesystem', status: 'connected' }], + }), + resultMessage({ result: 'hello' }), + ]), + ); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).rejects.toThrow( + /Claude Code runtime isolation failed: .*mcp_servers=filesystem.*agents=project-agent/, + ); + }); + + it('passes scrubbed env to object generation and agent loops', async () => { + const schema = z.object({ answer: z.string() }); + const objectQuery = vi.fn((_input: any) => + stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]), + ); + const objectRuntime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query: objectQuery, + env: { ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod', PATH: '/usr/bin' }, + }); + + await expect(objectRuntime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({ + answer: 'yes', + }); + expect(objectQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ PATH: '/usr/bin' })); + expect(objectQuery.mock.calls[0][0].options.env).not.toEqual( + expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), + ); + + const agentQuery = vi.fn((_input: any) => + stream([ + initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }), + { + type: 'assistant', + message: { role: 'assistant', content: [] }, + parent_tool_use_id: null, + uuid: '00000000-0000-4000-8000-000000000004', + session_id: 'session-id', + } as unknown as SDKMessage, + resultMessage({ subtype: 'error_max_turns', is_error: true }), + ]), + ); + const agentRuntime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query: agentQuery, + env: { ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1', HOME: '/Users/test' }, + }); + + await agentRuntime.runAgentLoop({ + modelRole: 'default', + systemPrompt: 'system', + userPrompt: 'user', + toolSet: { + load_skill: { + name: 'load_skill', + description: 'Load skill.', + inputSchema: z.object({ name: z.string() }), + execute: async () => ({ markdown: 'loaded' }), + }, + }, + stepBudget: 1, + telemetryTags: { operationName: 'test' }, + }); + expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' })); + expect(agentQuery.mock.calls[0][0].options.env).not.toEqual( + expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }), + ); + }); + + it('logs and ignores onStepFinish callback errors', async () => { + const query = vi.fn((_input: any) => + stream([ + initMessage(), + { + type: 'assistant', + message: { role: 'assistant', content: [] }, + parent_tool_use_id: null, + uuid: '00000000-0000-4000-8000-000000000005', + session_id: 'session-id', + } as unknown as SDKMessage, + resultMessage({ subtype: 'success', terminal_reason: 'completed' }), + ]), + ); + const logger = { + debug: vi.fn(), + log: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }; + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + logger, + }); + + await expect( + runtime.runAgentLoop({ + modelRole: 'default', + systemPrompt: 'system', + userPrompt: 'user', + toolSet: {}, + stepBudget: 1, + telemetryTags: { operationName: 'test' }, + onStepFinish: async () => { + throw new Error('callback exploded'); + }, + }), + ).resolves.toEqual({ stopReason: 'natural' }); + expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('callback exploded')); + }); + it('maps max-turn terminal reasons to budget', () => { expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_max_turns' }))).toBe('budget'); expect(mapClaudeCodeStopReason(resultMessage({ terminal_reason: 'max_turns' }))).toBe('budget'); diff --git a/packages/context/src/llm/claude-code-runtime.ts b/packages/context/src/llm/claude-code-runtime.ts index f45d81c5..be661a5c 100644 --- a/packages/context/src/llm/claude-code-runtime.ts +++ b/packages/context/src/llm/claude-code-runtime.ts @@ -78,25 +78,42 @@ function modelForRole(modelSlots: ClaudeCodeKtxLlmRuntimeDeps['modelSlots'], rol return resolveClaudeCodeModel(modelSlots[role] ?? modelSlots.default); } -function assertInitIsolation(message: SDKMessage, allowedToolIds: Set): void { +function assertInitIsolation( + message: SDKMessage, + allowedToolIds: Set, + expectedMcpServerNames: Set, +): void { if (message.type !== 'system' || message.subtype !== 'init') { return; } const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName)); + const activeMcpServerNames = message.mcp_servers.map((server) => server.name); + const unexpectedMcpServers = activeMcpServerNames.filter((name) => !expectedMcpServerNames.has(name)); + const missingMcpServers = [...expectedMcpServerNames].filter((name) => !activeMcpServerNames.includes(name)); + const unexpectedAgents = message.agents ?? []; if ( unexpectedTools.length > 0 || + unexpectedMcpServers.length > 0 || + missingMcpServers.length > 0 || message.slash_commands.length > 0 || message.skills.length > 0 || - message.plugins.length > 0 + message.plugins.length > 0 || + unexpectedAgents.length > 0 ) { throw new Error( - `Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} slash_commands=${ + `Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} mcp_servers=${ + unexpectedMcpServers.join(',') || '(none)' + } missing_mcp_servers=${missingMcpServers.join(',') || '(none)'} slash_commands=${ message.slash_commands.length - } skills=${message.skills.length} plugins=${message.plugins.length}`, + } skills=${message.skills.length} plugins=${message.plugins.length} agents=${unexpectedAgents.join(',') || '(none)'}`, ); } } +function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set { + return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set(); +} + function baseOptions(input: { projectDir: string; model: string; @@ -138,11 +155,12 @@ async function collectResult(params: { prompt: string; options: Options; allowedToolIds: Set; + expectedMcpServerNames: Set; onAssistantTurn?: () => Promise; }): Promise { let result: SDKResultMessage | undefined; for await (const message of params.query({ prompt: params.prompt, options: params.options })) { - assertInitIsolation(message, params.allowedToolIds); + assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames); if (message.type === 'assistant' && message.parent_tool_use_id === null) { await params.onAssistantTurn?.(); } @@ -178,6 +196,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'), options, allowedToolIds: new Set(mcpToolIds(input.tools ?? {})), + expectedMcpServerNames: expectedMcpServerNames(input.tools), }); const error = resultError(result); if (error) { @@ -207,6 +226,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'), options, allowedToolIds: new Set(mcpToolIds(input.tools ?? {})), + expectedMcpServerNames: expectedMcpServerNames(input.tools), }); const error = resultError(result); if (error) { @@ -233,6 +253,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { prompt: params.userPrompt, options: { ...options, systemPrompt: params.systemPrompt }, allowedToolIds: new Set(mcpToolIds(params.toolSet)), + expectedMcpServerNames: expectedMcpServerNames(params.toolSet), onAssistantTurn: async () => { stepIndex += 1; if (!params.onStepFinish) { @@ -277,6 +298,7 @@ export async function runClaudeCodeAuthProbe(input: { prompt: 'Reply with exactly: ok', options, allowedToolIds: new Set(), + expectedMcpServerNames: new Set(), }); const error = resultError(result); if (error) {