diff --git a/packages/context/src/agent/agent-runner.service.ts b/packages/context/src/agent/agent-runner.service.ts index aec2b9cf..90672c6d 100644 --- a/packages/context/src/agent/agent-runner.service.ts +++ b/packages/context/src/agent/agent-runner.service.ts @@ -11,6 +11,14 @@ export interface RunLoopStepInfo { stepBudget: number; } +export interface RunLoopToolFailure { + toolName: string; + input: unknown; + toolCallId?: string; + error: string; + durationMs?: number; +} + export interface RunLoopParams { modelRole: KtxModelRole; systemPrompt: string; @@ -19,6 +27,7 @@ export interface RunLoopParams { stepBudget: number; telemetryTags: Record; onStepFinish?: (info: RunLoopStepInfo) => void | Promise; + onToolFailure?: (failure: RunLoopToolFailure) => void | Promise; } export interface RunLoopResult { diff --git a/packages/context/src/agent/claude-agent-sdk-runner.service.test.ts b/packages/context/src/agent/claude-agent-sdk-runner.service.test.ts index abe06cb1..f4e6dcbb 100644 --- a/packages/context/src/agent/claude-agent-sdk-runner.service.test.ts +++ b/packages/context/src/agent/claude-agent-sdk-runner.service.test.ts @@ -127,4 +127,103 @@ describe('ClaudeAgentSdkRunnerService', () => { message: 'Only KTX MCP tools are available in this session.', }); }); + + it('reports SDK tool failures through the run-loop callback', async () => { + const query = vi.fn(() => + asyncMessages([{ type: 'result', subtype: 'success', terminal_reason: 'completed', result: 'done' }]), + ); + const failures: unknown[] = []; + const runner = new ClaudeAgentSdkRunnerService({ + projectDir: '/tmp/project', + modelSlots: {}, + query: query as never, + }); + + await runner.runLoop({ + modelRole: 'default', + systemPrompt: 'system', + userPrompt: 'user', + stepBudget: 1, + telemetryTags: {}, + toolSet: {}, + onToolFailure: async (failure) => { + failures.push(failure); + }, + }); + + const options = (query as any).mock.calls[0][0].options; + const hook = options.hooks.PostToolUseFailure[0].hooks[0]; + const output = await hook( + { + hook_event_name: 'PostToolUseFailure', + session_id: 'session-1', + transcript_path: '/tmp/project/transcript.jsonl', + cwd: '/tmp/project', + tool_name: 'mcp__ktx__read_raw_span', + tool_input: { path: 42 }, + tool_use_id: 'tool-1', + error: 'Input validation failed: expected path to be a string', + duration_ms: 12, + }, + 'tool-1', + { signal: new AbortController().signal }, + ); + + expect(output).toEqual({ + continue: true, + hookSpecificOutput: { hookEventName: 'PostToolUseFailure' }, + }); + expect(failures).toEqual([ + { + toolName: 'read_raw_span', + input: { path: 42 }, + toolCallId: 'tool-1', + error: 'Input validation failed: expected path to be a string', + durationMs: 12, + }, + ]); + }); + + it('passes SDK tool-use identifiers to KTX tool execution', async () => { + const query = vi.fn(() => + asyncMessages([{ type: 'result', subtype: 'success', terminal_reason: 'completed', result: 'done' }]), + ); + const execute = vi.fn(async ({ value }: { value: string }) => ({ + markdown: `pong ${value}`, + structured: { value }, + })); + const toolMock = vi.fn((name, description, inputSchema, handler) => ({ + name, + description, + inputSchema, + handler, + })); + const runner = new ClaudeAgentSdkRunnerService({ + projectDir: '/tmp/project', + modelSlots: {}, + query: query as never, + tool: toolMock as never, + }); + + await runner.runLoop({ + modelRole: 'default', + systemPrompt: 'system', + userPrompt: 'user', + stepBudget: 1, + telemetryTags: {}, + toolSet: { + ping: createAgentTool({ + name: 'ping', + description: 'Ping', + inputSchema: z.object({ value: z.string() }), + execute, + }), + }, + }); + + const handler = toolMock.mock.calls[0][3]; + await handler({ value: 'Ada' }, { toolUseID: 'tool-42' }); + + expect(execute).toHaveBeenCalledWith({ value: 'Ada' }, { toolCallId: 'tool-42' }); + }); }); diff --git a/packages/context/src/agent/claude-agent-sdk-runner.service.ts b/packages/context/src/agent/claude-agent-sdk-runner.service.ts index 8b3e1cbf..39679585 100644 --- a/packages/context/src/agent/claude-agent-sdk-runner.service.ts +++ b/packages/context/src/agent/claude-agent-sdk-runner.service.ts @@ -3,6 +3,7 @@ import { query, tool, type CanUseTool, + type HookCallbackMatcher, type SDKMessage, type SDKResultMessage, } from '@anthropic-ai/claude-agent-sdk'; @@ -38,6 +39,19 @@ const BUILT_IN_TOOLS = [ 'Write', ]; +function normalizeSdkToolName(toolName: string): string { + return toolName.startsWith('mcp__ktx__') ? toolName.slice('mcp__ktx__'.length) : toolName; +} + +function sdkToolCallId(extra: unknown): string | undefined { + if (!extra || typeof extra !== 'object') { + return undefined; + } + const record = extra as Record; + const id = record.toolUseID ?? record.tool_use_id ?? record.toolCallId; + return typeof id === 'string' ? id : undefined; +} + export interface ClaudeAgentSdkRunnerServiceDeps { projectDir: string; modelSlots: Partial>; @@ -75,6 +89,7 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort { private async consumeQuery(params: RunLoopParams): Promise { let result: SDKResultMessage | undefined; let stepIndex = 0; + const hooks = this.toolFailureHooks(params); const session = this.query({ prompt: params.userPrompt, options: { @@ -96,6 +111,7 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort { disallowedTools: BUILT_IN_TOOLS, permissionMode: 'dontAsk', canUseTool: this.canUseKtxTool, + ...(hooks ? { hooks } : {}), }, }); @@ -118,9 +134,39 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort { return model ? { model } : {}; } + private toolFailureHooks( + params: RunLoopParams, + ): Partial> | undefined { + if (!params.onToolFailure) { + return undefined; + } + + const hook: HookCallbackMatcher['hooks'][number] = async (input) => { + if (input.hook_event_name !== 'PostToolUseFailure') { + return { continue: true }; + } + await params.onToolFailure?.({ + toolName: normalizeSdkToolName(input.tool_name), + input: input.tool_input, + toolCallId: input.tool_use_id, + error: input.error, + ...(typeof input.duration_ms === 'number' ? { durationMs: input.duration_ms } : {}), + }); + return { + continue: true, + hookSpecificOutput: { hookEventName: 'PostToolUseFailure' as const }, + }; + }; + + return { PostToolUseFailure: [{ hooks: [hook] }] }; + } + private toSdkTool(definition: AgentToolDefinition) { - return this.tool(definition.name, definition.description, definition.inputSchema.shape, async (args) => { - const output = await definition.execute(definition.inputSchema.parse(args), {}); + return this.tool(definition.name, definition.description, definition.inputSchema.shape, async (args, extra) => { + const toolCallId = sdkToolCallId(extra); + const output = await definition.execute(definition.inputSchema.parse(args), { + ...(toolCallId ? { toolCallId } : {}), + }); return { content: [{ type: 'text' as const, text: agentToolOutputToText(output) }] }; }); } diff --git a/packages/context/src/agent/index.ts b/packages/context/src/agent/index.ts index db808ccf..8f998b6d 100644 --- a/packages/context/src/agent/index.ts +++ b/packages/context/src/agent/index.ts @@ -10,5 +10,6 @@ export type { RunLoopResult, RunLoopStepInfo, RunLoopStopReason, + RunLoopToolFailure, } from './agent-runner.service.js'; export { AgentRunnerService } from './agent-runner.service.js';