fix: report claude sdk tool failures

2026-07-25 12:01:03 +02:00 · 2026-05-15 13:13:14 +02:00 · 2026-05-15 13:13:14 +02:00 · 0ce798de68
commit 0ce798de68
parent 3b7b21d0fd
4 changed files with 157 additions and 2 deletions
--- a/packages/context/src/agent/agent-runner.service.ts
+++ b/packages/context/src/agent/agent-runner.service.ts
@ -11,6 +11,14 @@ export interface RunLoopStepInfo {
  stepBudget: number;
 }

+export interface RunLoopToolFailure {
+  toolName: string;
+  input: unknown;
+  toolCallId?: string;
+  error: string;
+  durationMs?: number;
+}
+
 export interface RunLoopParams {
  modelRole: KtxModelRole;
  systemPrompt: string;
@ -19,6 +27,7 @@ export interface RunLoopParams {
  stepBudget: number;
  telemetryTags: Record<string, string>;
  onStepFinish?: (info: RunLoopStepInfo) => void | Promise<void>;
+  onToolFailure?: (failure: RunLoopToolFailure) => void | Promise<void>;
 }

 export interface RunLoopResult {
--- a/packages/context/src/agent/claude-agent-sdk-runner.service.test.ts
+++ b/packages/context/src/agent/claude-agent-sdk-runner.service.test.ts
@ -127,4 +127,103 @@ describe('ClaudeAgentSdkRunnerService', () => {
      message: 'Only KTX MCP tools are available in this session.',
    });
  });
+
+  it('reports SDK tool failures through the run-loop callback', async () => {
+    const query = vi.fn(() =>
+      asyncMessages([{ type: 'result', subtype: 'success', terminal_reason: 'completed', result: 'done' }]),
+    );
+    const failures: unknown[] = [];
+    const runner = new ClaudeAgentSdkRunnerService({
+      projectDir: '/tmp/project',
+      modelSlots: {},
+      query: query as never,
+    });
+
+    await runner.runLoop({
+      modelRole: 'default',
+      systemPrompt: 'system',
+      userPrompt: 'user',
+      stepBudget: 1,
+      telemetryTags: {},
+      toolSet: {},
+      onToolFailure: async (failure) => {
+        failures.push(failure);
+      },
+    });
+
+    const options = (query as any).mock.calls[0][0].options;
+    const hook = options.hooks.PostToolUseFailure[0].hooks[0];
+    const output = await hook(
+      {
+        hook_event_name: 'PostToolUseFailure',
+        session_id: 'session-1',
+        transcript_path: '/tmp/project/transcript.jsonl',
+        cwd: '/tmp/project',
+        tool_name: 'mcp__ktx__read_raw_span',
+        tool_input: { path: 42 },
+        tool_use_id: 'tool-1',
+        error: 'Input validation failed: expected path to be a string',
+        duration_ms: 12,
+      },
+      'tool-1',
+      { signal: new AbortController().signal },
+    );
+
+    expect(output).toEqual({
+      continue: true,
+      hookSpecificOutput: { hookEventName: 'PostToolUseFailure' },
+    });
+    expect(failures).toEqual([
+      {
+        toolName: 'read_raw_span',
+        input: { path: 42 },
+        toolCallId: 'tool-1',
+        error: 'Input validation failed: expected path to be a string',
+        durationMs: 12,
+      },
+    ]);
+  });
+
+  it('passes SDK tool-use identifiers to KTX tool execution', async () => {
+    const query = vi.fn(() =>
+      asyncMessages([{ type: 'result', subtype: 'success', terminal_reason: 'completed', result: 'done' }]),
+    );
+    const execute = vi.fn(async ({ value }: { value: string }) => ({
+      markdown: `pong ${value}`,
+      structured: { value },
+    }));
+    const toolMock = vi.fn((name, description, inputSchema, handler) => ({
+      name,
+      description,
+      inputSchema,
+      handler,
+    }));
+    const runner = new ClaudeAgentSdkRunnerService({
+      projectDir: '/tmp/project',
+      modelSlots: {},
+      query: query as never,
+      tool: toolMock as never,
+    });
+
+    await runner.runLoop({
+      modelRole: 'default',
+      systemPrompt: 'system',
+      userPrompt: 'user',
+      stepBudget: 1,
+      telemetryTags: {},
+      toolSet: {
+        ping: createAgentTool({
+          name: 'ping',
+          description: 'Ping',
+          inputSchema: z.object({ value: z.string() }),
+          execute,
+        }),
+      },
+    });
+
+    const handler = toolMock.mock.calls[0][3];
+    await handler({ value: 'Ada' }, { toolUseID: 'tool-42' });
+
+    expect(execute).toHaveBeenCalledWith({ value: 'Ada' }, { toolCallId: 'tool-42' });
+  });
 });
--- a/packages/context/src/agent/claude-agent-sdk-runner.service.ts
+++ b/packages/context/src/agent/claude-agent-sdk-runner.service.ts
@ -3,6 +3,7 @@ import {
  query,
  tool,
  type CanUseTool,
+  type HookCallbackMatcher,
  type SDKMessage,
  type SDKResultMessage,
 } from '@anthropic-ai/claude-agent-sdk';
@ -38,6 +39,19 @@ const BUILT_IN_TOOLS = [
  'Write',
 ];

+function normalizeSdkToolName(toolName: string): string {
+  return toolName.startsWith('mcp__ktx__') ? toolName.slice('mcp__ktx__'.length) : toolName;
+}
+
+function sdkToolCallId(extra: unknown): string | undefined {
+  if (!extra || typeof extra !== 'object') {
+    return undefined;
+  }
+  const record = extra as Record<string, unknown>;
+  const id = record.toolUseID ?? record.tool_use_id ?? record.toolCallId;
+  return typeof id === 'string' ? id : undefined;
+}
+
 export interface ClaudeAgentSdkRunnerServiceDeps {
  projectDir: string;
  modelSlots: Partial<Record<KtxModelRole, string>>;
@ -75,6 +89,7 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
  private async consumeQuery(params: RunLoopParams): Promise<SDKResultMessage | undefined> {
    let result: SDKResultMessage | undefined;
    let stepIndex = 0;
+    const hooks = this.toolFailureHooks(params);
    const session = this.query({
      prompt: params.userPrompt,
      options: {
@ -96,6 +111,7 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
        disallowedTools: BUILT_IN_TOOLS,
        permissionMode: 'dontAsk',
        canUseTool: this.canUseKtxTool,
+        ...(hooks ? { hooks } : {}),
      },
    });

@ -118,9 +134,39 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
    return model ? { model } : {};
  }

+  private toolFailureHooks(
+    params: RunLoopParams,
+  ): Partial<Record<'PostToolUseFailure', HookCallbackMatcher[]>> | undefined {
+    if (!params.onToolFailure) {
+      return undefined;
+    }
+
+    const hook: HookCallbackMatcher['hooks'][number] = async (input) => {
+      if (input.hook_event_name !== 'PostToolUseFailure') {
+        return { continue: true };
+      }
+      await params.onToolFailure?.({
+        toolName: normalizeSdkToolName(input.tool_name),
+        input: input.tool_input,
+        toolCallId: input.tool_use_id,
+        error: input.error,
+        ...(typeof input.duration_ms === 'number' ? { durationMs: input.duration_ms } : {}),
+      });
+      return {
+        continue: true,
+        hookSpecificOutput: { hookEventName: 'PostToolUseFailure' as const },
+      };
+    };
+
+    return { PostToolUseFailure: [{ hooks: [hook] }] };
+  }
+
  private toSdkTool(definition: AgentToolDefinition) {
-    return this.tool(definition.name, definition.description, definition.inputSchema.shape, async (args) => {
-      const output = await definition.execute(definition.inputSchema.parse(args), {});
+    return this.tool(definition.name, definition.description, definition.inputSchema.shape, async (args, extra) => {
+      const toolCallId = sdkToolCallId(extra);
+      const output = await definition.execute(definition.inputSchema.parse(args), {
+        ...(toolCallId ? { toolCallId } : {}),
+      });
      return { content: [{ type: 'text' as const, text: agentToolOutputToText(output) }] };
    });
  }
--- a/packages/context/src/agent/index.ts
+++ b/packages/context/src/agent/index.ts
@ -10,5 +10,6 @@ export type {
  RunLoopResult,
  RunLoopStepInfo,
  RunLoopStopReason,
+  RunLoopToolFailure,
 } from './agent-runner.service.js';
 export { AgentRunnerService } from './agent-runner.service.js';