From 71fde812b9d94a9a06d48f76852a534a08d99bd5 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 15 May 2026 16:06:04 +0200 Subject: [PATCH] feat: add claude-code llm runtime --- .../context/src/llm/claude-code-env.test.ts | 19 ++ packages/context/src/llm/claude-code-env.ts | 23 ++ .../src/llm/claude-code-models.test.ts | 17 ++ .../context/src/llm/claude-code-models.ts | 19 ++ .../src/llm/claude-code-runtime.test.ts | 182 +++++++++++ .../context/src/llm/claude-code-runtime.ts | 287 ++++++++++++++++++ packages/context/src/llm/index.ts | 3 + 7 files changed, 550 insertions(+) create mode 100644 packages/context/src/llm/claude-code-env.test.ts create mode 100644 packages/context/src/llm/claude-code-env.ts create mode 100644 packages/context/src/llm/claude-code-models.test.ts create mode 100644 packages/context/src/llm/claude-code-models.ts create mode 100644 packages/context/src/llm/claude-code-runtime.test.ts create mode 100644 packages/context/src/llm/claude-code-runtime.ts diff --git a/packages/context/src/llm/claude-code-env.test.ts b/packages/context/src/llm/claude-code-env.test.ts new file mode 100644 index 00000000..19cbd1ff --- /dev/null +++ b/packages/context/src/llm/claude-code-env.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, it } from 'vitest'; +import { CLAUDE_CODE_PROVIDER_ENV_DENYLIST, createKtxClaudeCodeEnv } from './claude-code-env.js'; + +describe('createKtxClaudeCodeEnv', () => { + it('strips provider-routing credentials from the Claude Code child environment', () => { + const seeded = Object.fromEntries(CLAUDE_CODE_PROVIDER_ENV_DENYLIST.map((key) => [key, `${key}-value`])); + const env = createKtxClaudeCodeEnv({ + ...seeded, + PATH: '/usr/bin', + HOME: '/Users/test', + }); + + for (const key of CLAUDE_CODE_PROVIDER_ENV_DENYLIST) { + expect(env).not.toHaveProperty(key); + } + expect(env.PATH).toBe('/usr/bin'); + expect(env.HOME).toBe('/Users/test'); + }); +}); diff --git a/packages/context/src/llm/claude-code-env.ts b/packages/context/src/llm/claude-code-env.ts new file mode 100644 index 00000000..285113e4 --- /dev/null +++ b/packages/context/src/llm/claude-code-env.ts @@ -0,0 +1,23 @@ +export const CLAUDE_CODE_PROVIDER_ENV_DENYLIST = [ + 'ANTHROPIC_API_KEY', + 'ANTHROPIC_AUTH_TOKEN', + 'ANTHROPIC_BASE_URL', + 'ANTHROPIC_MODEL', + 'ANTHROPIC_VERTEX_PROJECT_ID', + 'CLOUD_ML_REGION', + 'GOOGLE_APPLICATION_CREDENTIALS', + 'GOOGLE_CLOUD_PROJECT', + 'AWS_ACCESS_KEY_ID', + 'AWS_SECRET_ACCESS_KEY', + 'AWS_SESSION_TOKEN', + 'AWS_REGION', + 'AWS_PROFILE', + 'CLAUDE_CODE_USE_BEDROCK', + 'CLAUDE_CODE_USE_VERTEX', +] as const; + +const DENYLIST = new Set(CLAUDE_CODE_PROVIDER_ENV_DENYLIST); + +export function createKtxClaudeCodeEnv(env: NodeJS.ProcessEnv = process.env): Record { + return Object.fromEntries(Object.entries(env).filter(([key]) => !DENYLIST.has(key))); +} diff --git a/packages/context/src/llm/claude-code-models.test.ts b/packages/context/src/llm/claude-code-models.test.ts new file mode 100644 index 00000000..482e6af8 --- /dev/null +++ b/packages/context/src/llm/claude-code-models.test.ts @@ -0,0 +1,17 @@ +import { describe, expect, it } from 'vitest'; +import { resolveClaudeCodeModel } from './claude-code-models.js'; + +describe('resolveClaudeCodeModel', () => { + it.each([ + ['sonnet', 'claude-sonnet-4-6'], + ['opus', 'claude-opus-4-7'], + ['haiku', 'claude-haiku-4-5'], + ['claude-sonnet-4-6', 'claude-sonnet-4-6'], + ])('maps %s to %s', (input, expected) => { + expect(resolveClaudeCodeModel(input)).toBe(expected); + }); + + it('rejects unsupported aliases', () => { + expect(() => resolveClaudeCodeModel('gpt-5')).toThrow('Unsupported Claude Code model'); + }); +}); diff --git a/packages/context/src/llm/claude-code-models.ts b/packages/context/src/llm/claude-code-models.ts new file mode 100644 index 00000000..7676409b --- /dev/null +++ b/packages/context/src/llm/claude-code-models.ts @@ -0,0 +1,19 @@ +const CLAUDE_CODE_MODEL_ALIASES: Record = { + sonnet: 'claude-sonnet-4-6', + opus: 'claude-opus-4-7', + haiku: 'claude-haiku-4-5', +}; + +const FULL_MODEL_ID = /^claude-(sonnet|opus|haiku)-[0-9]+-[0-9]+$/; + +export function resolveClaudeCodeModel(model: string): string { + const normalized = model.trim(); + const alias = CLAUDE_CODE_MODEL_ALIASES[normalized]; + if (alias) { + return alias; + } + if (FULL_MODEL_ID.test(normalized)) { + return normalized; + } + throw new Error(`Unsupported Claude Code model "${model}". Use sonnet, opus, haiku, or a claude-* model id.`); +} diff --git a/packages/context/src/llm/claude-code-runtime.test.ts b/packages/context/src/llm/claude-code-runtime.test.ts new file mode 100644 index 00000000..23c7d550 --- /dev/null +++ b/packages/context/src/llm/claude-code-runtime.test.ts @@ -0,0 +1,182 @@ +import { describe, expect, it, vi } from 'vitest'; +import { z } from 'zod'; +import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk'; +import { ClaudeCodeKtxLlmRuntime, mapClaudeCodeStopReason, runClaudeCodeAuthProbe } from './claude-code-runtime.js'; + +async function* stream(messages: SDKMessage[]): AsyncGenerator { + for (const message of messages) { + yield message; + } +} + +function initMessage(overrides: Partial> = {}): Extract< + SDKMessage, + { type: 'system'; subtype: 'init' } +> { + return { + type: 'system', + subtype: 'init', + apiKeySource: 'none', + claude_code_version: '0.3.142', + cwd: '/tmp/project', + tools: [], + mcp_servers: [], + model: 'claude-sonnet-4-6', + permissionMode: 'dontAsk', + slash_commands: [], + output_style: 'default', + skills: [], + plugins: [], + uuid: 'init-id', + session_id: 'session-id', + ...overrides, + }; +} + +function resultMessage(overrides: Partial> = {}): Extract< + SDKMessage, + { type: 'result' } +> { + return { + type: 'result', + subtype: 'success', + duration_ms: 1, + duration_api_ms: 1, + is_error: false, + num_turns: 1, + result: 'ok', + stop_reason: null, + total_cost_usd: 0, + usage: {} as never, + modelUsage: {}, + permission_denials: [], + errors: [], + uuid: 'result-id', + session_id: 'session-id', + ...overrides, + } as Extract; +} + +describe('ClaudeCodeKtxLlmRuntime', () => { + it('passes isolation options and scrubbed env to text generation', async () => { + const query = vi.fn(() => stream([initMessage(), resultMessage({ result: 'hello' })])); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello'); + expect(query).toHaveBeenCalledWith({ + prompt: 'say hello', + options: expect.objectContaining({ + cwd: '/tmp/project', + model: 'claude-sonnet-4-6', + maxTurns: 1, + settingSources: [], + skills: [], + plugins: [], + tools: [], + allowedTools: [], + permissionMode: 'dontAsk', + persistSession: false, + env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }), + }), + }); + }); + + it('validates structured output with the caller schema', async () => { + const schema = z.object({ answer: z.string() }); + const query = vi.fn(() => stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })])); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + }); + + await expect(runtime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({ answer: 'yes' }); + expect(query.mock.calls[0][0].options.outputFormat).toMatchObject({ + type: 'json_schema', + schema: expect.objectContaining({ type: 'object' }), + }); + }); + + it('registers only exact KTX MCP tool ids and denies non-KTX tools', async () => { + const query = vi.fn(() => + stream([ + initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }), + { + type: 'assistant', + message: { role: 'assistant', content: [] }, + parent_tool_use_id: null, + uuid: 'assistant-1', + session_id: 'session-id', + } as SDKMessage, + resultMessage({ subtype: 'error_max_turns', is_error: true }), + ]), + ); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + }); + const onStepFinish = vi.fn(); + + await runtime.runAgentLoop({ + modelRole: 'default', + systemPrompt: 'system', + userPrompt: 'user', + toolSet: { + load_skill: { + name: 'load_skill', + description: 'Load skill.', + inputSchema: z.object({ name: z.string() }), + execute: async () => ({ markdown: 'loaded' }), + }, + }, + stepBudget: 1, + telemetryTags: { operationName: 'test' }, + onStepFinish, + }); + + const options = query.mock.calls[0][0].options; + expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']); + expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({ + behavior: 'allow', + toolUseID: '1', + }); + expect(await options.canUseTool('Bash', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({ + behavior: 'deny', + toolUseID: '2', + }); + expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 1 }); + }); + + it('maps max-turn terminal reasons to budget', () => { + expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_max_turns' }))).toBe('budget'); + expect(mapClaudeCodeStopReason(resultMessage({ terminal_reason: 'max_turns' }))).toBe('budget'); + expect(mapClaudeCodeStopReason(resultMessage({ stop_reason: 'max_turns' }))).toBe('budget'); + expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'success', terminal_reason: 'completed' }))).toBe('natural'); + expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_during_execution' }))).toBe('error'); + }); + + it('auth probe uses isolation options and a scrubbed env', async () => { + const query = vi.fn(() => stream([initMessage(), resultMessage({ result: 'ok' })])); + + await expect( + runClaudeCodeAuthProbe({ projectDir: '/tmp/project', model: 'sonnet', query, env: { ANTHROPIC_API_KEY: 'sk-ant-test' } }), + ).resolves.toEqual({ ok: true }); + expect(query.mock.calls[0][0].options).toMatchObject({ + settingSources: [], + skills: [], + plugins: [], + tools: [], + allowedTools: [], + persistSession: false, + env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }), + }); + }); +}); diff --git a/packages/context/src/llm/claude-code-runtime.ts b/packages/context/src/llm/claude-code-runtime.ts new file mode 100644 index 00000000..2e7e74e5 --- /dev/null +++ b/packages/context/src/llm/claude-code-runtime.ts @@ -0,0 +1,287 @@ +import { + createSdkMcpServer, + query as defaultQuery, + type Options, + type SDKMessage, + type SDKResultMessage, +} from '@anthropic-ai/claude-agent-sdk'; +import { z } from 'zod'; +import { noopLogger, type KtxLogger } from '../core/index.js'; +import { createKtxClaudeCodeEnv } from './claude-code-env.js'; +import { resolveClaudeCodeModel } from './claude-code-models.js'; +import { createClaudeSdkTools, mcpToolIds } from './runtime-tools.js'; +import type { + KtxGenerateObjectInput, + KtxGenerateTextInput, + KtxLlmRuntimePort, + KtxRuntimeToolSet, + RunLoopParams, + RunLoopResult, + RunLoopStopReason, +} from './runtime-port.js'; + +type QueryFn = (params: Parameters[0]) => AsyncIterable; + +export interface ClaudeCodeKtxLlmRuntimeDeps { + projectDir: string; + modelSlots: { default: string } & Partial>; + query?: QueryFn; + env?: NodeJS.ProcessEnv; + logger?: KtxLogger; +} + +const BUILTIN_TOOLS = [ + 'Agent', + 'Task', + 'AskUserQuestion', + 'Bash', + 'Read', + 'Edit', + 'Write', + 'Glob', + 'Grep', + 'WebFetch', + 'WebSearch', + 'TodoWrite', +]; + +function isResult(message: SDKMessage): message is SDKResultMessage { + return message.type === 'result'; +} + +function resultError(result: SDKResultMessage): Error | undefined { + if (result.subtype === 'success') { + return undefined; + } + const details = result.errors.length > 0 ? `: ${result.errors.join('; ')}` : ''; + return new Error(`Claude Code query failed (${result.subtype})${details}`); +} + +export function mapClaudeCodeStopReason(result: SDKResultMessage): RunLoopStopReason { + if (result.subtype === 'error_max_turns') { + return 'budget'; + } + if (result.terminal_reason === 'max_turns' || result.stop_reason === 'max_turns') { + return 'budget'; + } + if (result.subtype === 'success') { + return result.terminal_reason && result.terminal_reason !== 'completed' ? 'error' : 'natural'; + } + return 'error'; +} + +function jsonSchema(schema: z.ZodType): Record { + return z.toJSONSchema(schema, { target: 'draft-7' }) as Record; +} + +function modelForRole(modelSlots: ClaudeCodeKtxLlmRuntimeDeps['modelSlots'], role: string): string { + return resolveClaudeCodeModel(modelSlots[role] ?? modelSlots.default); +} + +function assertInitIsolation(message: SDKMessage, allowedToolIds: Set): void { + if (message.type !== 'system' || message.subtype !== 'init') { + return; + } + const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName)); + if ( + unexpectedTools.length > 0 || + message.slash_commands.length > 0 || + message.skills.length > 0 || + message.plugins.length > 0 + ) { + throw new Error( + `Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} slash_commands=${ + message.slash_commands.length + } skills=${message.skills.length} plugins=${message.plugins.length}`, + ); + } +} + +function baseOptions(input: { + projectDir: string; + model: string; + env: NodeJS.ProcessEnv | undefined; + maxTurns: number; + tools?: KtxRuntimeToolSet; +}): Options { + const toolIds = mcpToolIds(input.tools ?? {}); + const allowedToolIds = new Set(toolIds); + return { + cwd: input.projectDir, + model: input.model, + maxTurns: input.maxTurns, + settingSources: [], + skills: [], + plugins: [], + tools: [], + allowedTools: toolIds, + disallowedTools: BUILTIN_TOOLS, + canUseTool: async (toolName, _toolInput, options) => + allowedToolIds.has(toolName) + ? { behavior: 'allow', toolUseID: options.toolUseID } + : { + behavior: 'deny', + message: `KTX claude-code runtime only permits current KTX MCP tools; denied ${toolName}.`, + toolUseID: options.toolUseID, + }, + permissionMode: 'dontAsk', + persistSession: false, + env: createKtxClaudeCodeEnv(input.env), + ...(input.tools && Object.keys(input.tools).length > 0 + ? { mcpServers: { ktx: createSdkMcpServer({ name: 'ktx', tools: createClaudeSdkTools(input.tools) }) } } + : {}), + }; +} + +async function collectResult(params: { + query: QueryFn; + prompt: string; + options: Options; + allowedToolIds: Set; + onAssistantTurn?: () => Promise; +}): Promise { + let result: SDKResultMessage | undefined; + for await (const message of params.query({ prompt: params.prompt, options: params.options })) { + assertInitIsolation(message, params.allowedToolIds); + if (message.type === 'assistant' && message.parent_tool_use_id === null) { + await params.onAssistantTurn?.(); + } + if (isResult(message)) { + result = message; + } + } + if (!result) { + throw new Error('Claude Code query returned no result message'); + } + return result; +} + +export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { + private readonly runQuery: QueryFn; + private readonly logger: KtxLogger; + + constructor(private readonly deps: ClaudeCodeKtxLlmRuntimeDeps) { + this.runQuery = deps.query ?? defaultQuery; + this.logger = deps.logger ?? noopLogger; + } + + async generateText(input: KtxGenerateTextInput): Promise { + const options = baseOptions({ + projectDir: this.deps.projectDir, + model: modelForRole(this.deps.modelSlots, input.role), + env: this.deps.env, + maxTurns: 1, + tools: input.tools, + }); + const result = await collectResult({ + query: this.runQuery, + prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'), + options, + allowedToolIds: new Set(mcpToolIds(input.tools ?? {})), + }); + const error = resultError(result); + if (error) { + throw error; + } + return result.result; + } + + async generateObject>( + input: KtxGenerateObjectInput, + ): Promise { + const options = { + ...baseOptions({ + projectDir: this.deps.projectDir, + model: modelForRole(this.deps.modelSlots, input.role), + env: this.deps.env, + maxTurns: 1, + tools: input.tools, + }), + outputFormat: { type: 'json_schema' as const, schema: jsonSchema(input.schema as z.ZodType) }, + }; + const result = await collectResult({ + query: this.runQuery, + prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'), + options, + allowedToolIds: new Set(mcpToolIds(input.tools ?? {})), + }); + const error = resultError(result); + if (error) { + throw error; + } + return (input.schema as z.ZodType).parse(result.structured_output); + } + + async runAgentLoop(params: RunLoopParams): Promise { + let stepIndex = 0; + try { + const options = baseOptions({ + projectDir: this.deps.projectDir, + model: modelForRole(this.deps.modelSlots, params.modelRole), + env: this.deps.env, + maxTurns: params.stepBudget, + tools: params.toolSet, + }); + const result = await collectResult({ + query: this.runQuery, + prompt: params.userPrompt, + options: { ...options, systemPrompt: params.systemPrompt }, + allowedToolIds: new Set(mcpToolIds(params.toolSet)), + onAssistantTurn: async () => { + stepIndex += 1; + if (!params.onStepFinish) { + return; + } + try { + await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget }); + } catch (error) { + this.logger.warn( + `[claude-code-runner] onStepFinish callback threw; ignoring: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } + }, + }); + const stopReason = mapClaudeCodeStopReason(result); + const error = resultError(result); + return { stopReason, ...(stopReason === 'error' && error ? { error } : {}) }; + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + return { stopReason: 'error', error: err }; + } + } +} + +export async function runClaudeCodeAuthProbe(input: { + projectDir: string; + model: string; + query?: QueryFn; + env?: NodeJS.ProcessEnv; +}): Promise<{ ok: true } | { ok: false; message: string }> { + try { + const options = baseOptions({ + projectDir: input.projectDir, + model: resolveClaudeCodeModel(input.model), + env: input.env, + maxTurns: 1, + }); + const result = await collectResult({ + query: input.query ?? defaultQuery, + prompt: 'Reply with exactly: ok', + options, + allowedToolIds: new Set(), + }); + const error = resultError(result); + if (error) { + throw error; + } + return { ok: true }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + ok: false, + message: `Claude Code authentication is not usable. Authenticate Claude Code locally with the Claude Code CLI, then rerun setup or the command. ${message}`, + }; + } +} diff --git a/packages/context/src/llm/index.ts b/packages/context/src/llm/index.ts index 47a2f363..bf280604 100644 --- a/packages/context/src/llm/index.ts +++ b/packages/context/src/llm/index.ts @@ -1,6 +1,9 @@ export { KtxIngestEmbeddingPortAdapter, KtxScanEmbeddingPortAdapter } from './embedding-port.js'; export { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js'; export type { AgentTelemetryPort, AiSdkKtxLlmRuntimeDeps } from './ai-sdk-runtime.js'; +export { createKtxClaudeCodeEnv, CLAUDE_CODE_PROVIDER_ENV_DENYLIST } from './claude-code-env.js'; +export { resolveClaudeCodeModel } from './claude-code-models.js'; +export { ClaudeCodeKtxLlmRuntime, mapClaudeCodeStopReason, runClaudeCodeAuthProbe } from './claude-code-runtime.js'; export { generateKtxObject, generateKtxText } from './generation.js'; export type { AgentRunnerPort,