fix: report claude sdk tool failures

This commit is contained in:
Andrey Avtomonov 2026-05-15 13:13:14 +02:00
parent 3b7b21d0fd
commit 0ce798de68
4 changed files with 157 additions and 2 deletions

View file

@ -11,6 +11,14 @@ export interface RunLoopStepInfo {
stepBudget: number;
}
export interface RunLoopToolFailure {
toolName: string;
input: unknown;
toolCallId?: string;
error: string;
durationMs?: number;
}
export interface RunLoopParams {
modelRole: KtxModelRole;
systemPrompt: string;
@ -19,6 +27,7 @@ export interface RunLoopParams {
stepBudget: number;
telemetryTags: Record<string, string>;
onStepFinish?: (info: RunLoopStepInfo) => void | Promise<void>;
onToolFailure?: (failure: RunLoopToolFailure) => void | Promise<void>;
}
export interface RunLoopResult {

View file

@ -127,4 +127,103 @@ describe('ClaudeAgentSdkRunnerService', () => {
message: 'Only KTX MCP tools are available in this session.',
});
});
it('reports SDK tool failures through the run-loop callback', async () => {
const query = vi.fn(() =>
asyncMessages([{ type: 'result', subtype: 'success', terminal_reason: 'completed', result: 'done' }]),
);
const failures: unknown[] = [];
const runner = new ClaudeAgentSdkRunnerService({
projectDir: '/tmp/project',
modelSlots: {},
query: query as never,
});
await runner.runLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
stepBudget: 1,
telemetryTags: {},
toolSet: {},
onToolFailure: async (failure) => {
failures.push(failure);
},
});
const options = (query as any).mock.calls[0][0].options;
const hook = options.hooks.PostToolUseFailure[0].hooks[0];
const output = await hook(
{
hook_event_name: 'PostToolUseFailure',
session_id: 'session-1',
transcript_path: '/tmp/project/transcript.jsonl',
cwd: '/tmp/project',
tool_name: 'mcp__ktx__read_raw_span',
tool_input: { path: 42 },
tool_use_id: 'tool-1',
error: 'Input validation failed: expected path to be a string',
duration_ms: 12,
},
'tool-1',
{ signal: new AbortController().signal },
);
expect(output).toEqual({
continue: true,
hookSpecificOutput: { hookEventName: 'PostToolUseFailure' },
});
expect(failures).toEqual([
{
toolName: 'read_raw_span',
input: { path: 42 },
toolCallId: 'tool-1',
error: 'Input validation failed: expected path to be a string',
durationMs: 12,
},
]);
});
it('passes SDK tool-use identifiers to KTX tool execution', async () => {
const query = vi.fn(() =>
asyncMessages([{ type: 'result', subtype: 'success', terminal_reason: 'completed', result: 'done' }]),
);
const execute = vi.fn(async ({ value }: { value: string }) => ({
markdown: `pong ${value}`,
structured: { value },
}));
const toolMock = vi.fn((name, description, inputSchema, handler) => ({
name,
description,
inputSchema,
handler,
}));
const runner = new ClaudeAgentSdkRunnerService({
projectDir: '/tmp/project',
modelSlots: {},
query: query as never,
tool: toolMock as never,
});
await runner.runLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
stepBudget: 1,
telemetryTags: {},
toolSet: {
ping: createAgentTool({
name: 'ping',
description: 'Ping',
inputSchema: z.object({ value: z.string() }),
execute,
}),
},
});
const handler = toolMock.mock.calls[0][3];
await handler({ value: 'Ada' }, { toolUseID: 'tool-42' });
expect(execute).toHaveBeenCalledWith({ value: 'Ada' }, { toolCallId: 'tool-42' });
});
});

View file

@ -3,6 +3,7 @@ import {
query,
tool,
type CanUseTool,
type HookCallbackMatcher,
type SDKMessage,
type SDKResultMessage,
} from '@anthropic-ai/claude-agent-sdk';
@ -38,6 +39,19 @@ const BUILT_IN_TOOLS = [
'Write',
];
function normalizeSdkToolName(toolName: string): string {
return toolName.startsWith('mcp__ktx__') ? toolName.slice('mcp__ktx__'.length) : toolName;
}
function sdkToolCallId(extra: unknown): string | undefined {
if (!extra || typeof extra !== 'object') {
return undefined;
}
const record = extra as Record<string, unknown>;
const id = record.toolUseID ?? record.tool_use_id ?? record.toolCallId;
return typeof id === 'string' ? id : undefined;
}
export interface ClaudeAgentSdkRunnerServiceDeps {
projectDir: string;
modelSlots: Partial<Record<KtxModelRole, string>>;
@ -75,6 +89,7 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
private async consumeQuery(params: RunLoopParams): Promise<SDKResultMessage | undefined> {
let result: SDKResultMessage | undefined;
let stepIndex = 0;
const hooks = this.toolFailureHooks(params);
const session = this.query({
prompt: params.userPrompt,
options: {
@ -96,6 +111,7 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
disallowedTools: BUILT_IN_TOOLS,
permissionMode: 'dontAsk',
canUseTool: this.canUseKtxTool,
...(hooks ? { hooks } : {}),
},
});
@ -118,9 +134,39 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
return model ? { model } : {};
}
private toolFailureHooks(
params: RunLoopParams,
): Partial<Record<'PostToolUseFailure', HookCallbackMatcher[]>> | undefined {
if (!params.onToolFailure) {
return undefined;
}
const hook: HookCallbackMatcher['hooks'][number] = async (input) => {
if (input.hook_event_name !== 'PostToolUseFailure') {
return { continue: true };
}
await params.onToolFailure?.({
toolName: normalizeSdkToolName(input.tool_name),
input: input.tool_input,
toolCallId: input.tool_use_id,
error: input.error,
...(typeof input.duration_ms === 'number' ? { durationMs: input.duration_ms } : {}),
});
return {
continue: true,
hookSpecificOutput: { hookEventName: 'PostToolUseFailure' as const },
};
};
return { PostToolUseFailure: [{ hooks: [hook] }] };
}
private toSdkTool(definition: AgentToolDefinition) {
return this.tool(definition.name, definition.description, definition.inputSchema.shape, async (args) => {
const output = await definition.execute(definition.inputSchema.parse(args), {});
return this.tool(definition.name, definition.description, definition.inputSchema.shape, async (args, extra) => {
const toolCallId = sdkToolCallId(extra);
const output = await definition.execute(definition.inputSchema.parse(args), {
...(toolCallId ? { toolCallId } : {}),
});
return { content: [{ type: 'text' as const, text: agentToolOutputToText(output) }] };
});
}

View file

@ -10,5 +10,6 @@ export type {
RunLoopResult,
RunLoopStepInfo,
RunLoopStopReason,
RunLoopToolFailure,
} from './agent-runner.service.js';
export { AgentRunnerService } from './agent-runner.service.js';