mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
fix: report claude sdk tool failures
This commit is contained in:
parent
3b7b21d0fd
commit
0ce798de68
4 changed files with 157 additions and 2 deletions
|
|
@ -11,6 +11,14 @@ export interface RunLoopStepInfo {
|
|||
stepBudget: number;
|
||||
}
|
||||
|
||||
export interface RunLoopToolFailure {
|
||||
toolName: string;
|
||||
input: unknown;
|
||||
toolCallId?: string;
|
||||
error: string;
|
||||
durationMs?: number;
|
||||
}
|
||||
|
||||
export interface RunLoopParams {
|
||||
modelRole: KtxModelRole;
|
||||
systemPrompt: string;
|
||||
|
|
@ -19,6 +27,7 @@ export interface RunLoopParams {
|
|||
stepBudget: number;
|
||||
telemetryTags: Record<string, string>;
|
||||
onStepFinish?: (info: RunLoopStepInfo) => void | Promise<void>;
|
||||
onToolFailure?: (failure: RunLoopToolFailure) => void | Promise<void>;
|
||||
}
|
||||
|
||||
export interface RunLoopResult {
|
||||
|
|
|
|||
|
|
@ -127,4 +127,103 @@ describe('ClaudeAgentSdkRunnerService', () => {
|
|||
message: 'Only KTX MCP tools are available in this session.',
|
||||
});
|
||||
});
|
||||
|
||||
it('reports SDK tool failures through the run-loop callback', async () => {
|
||||
const query = vi.fn(() =>
|
||||
asyncMessages([{ type: 'result', subtype: 'success', terminal_reason: 'completed', result: 'done' }]),
|
||||
);
|
||||
const failures: unknown[] = [];
|
||||
const runner = new ClaudeAgentSdkRunnerService({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: {},
|
||||
query: query as never,
|
||||
});
|
||||
|
||||
await runner.runLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
stepBudget: 1,
|
||||
telemetryTags: {},
|
||||
toolSet: {},
|
||||
onToolFailure: async (failure) => {
|
||||
failures.push(failure);
|
||||
},
|
||||
});
|
||||
|
||||
const options = (query as any).mock.calls[0][0].options;
|
||||
const hook = options.hooks.PostToolUseFailure[0].hooks[0];
|
||||
const output = await hook(
|
||||
{
|
||||
hook_event_name: 'PostToolUseFailure',
|
||||
session_id: 'session-1',
|
||||
transcript_path: '/tmp/project/transcript.jsonl',
|
||||
cwd: '/tmp/project',
|
||||
tool_name: 'mcp__ktx__read_raw_span',
|
||||
tool_input: { path: 42 },
|
||||
tool_use_id: 'tool-1',
|
||||
error: 'Input validation failed: expected path to be a string',
|
||||
duration_ms: 12,
|
||||
},
|
||||
'tool-1',
|
||||
{ signal: new AbortController().signal },
|
||||
);
|
||||
|
||||
expect(output).toEqual({
|
||||
continue: true,
|
||||
hookSpecificOutput: { hookEventName: 'PostToolUseFailure' },
|
||||
});
|
||||
expect(failures).toEqual([
|
||||
{
|
||||
toolName: 'read_raw_span',
|
||||
input: { path: 42 },
|
||||
toolCallId: 'tool-1',
|
||||
error: 'Input validation failed: expected path to be a string',
|
||||
durationMs: 12,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('passes SDK tool-use identifiers to KTX tool execution', async () => {
|
||||
const query = vi.fn(() =>
|
||||
asyncMessages([{ type: 'result', subtype: 'success', terminal_reason: 'completed', result: 'done' }]),
|
||||
);
|
||||
const execute = vi.fn(async ({ value }: { value: string }) => ({
|
||||
markdown: `pong ${value}`,
|
||||
structured: { value },
|
||||
}));
|
||||
const toolMock = vi.fn((name, description, inputSchema, handler) => ({
|
||||
name,
|
||||
description,
|
||||
inputSchema,
|
||||
handler,
|
||||
}));
|
||||
const runner = new ClaudeAgentSdkRunnerService({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: {},
|
||||
query: query as never,
|
||||
tool: toolMock as never,
|
||||
});
|
||||
|
||||
await runner.runLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
stepBudget: 1,
|
||||
telemetryTags: {},
|
||||
toolSet: {
|
||||
ping: createAgentTool({
|
||||
name: 'ping',
|
||||
description: 'Ping',
|
||||
inputSchema: z.object({ value: z.string() }),
|
||||
execute,
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
const handler = toolMock.mock.calls[0][3];
|
||||
await handler({ value: 'Ada' }, { toolUseID: 'tool-42' });
|
||||
|
||||
expect(execute).toHaveBeenCalledWith({ value: 'Ada' }, { toolCallId: 'tool-42' });
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import {
|
|||
query,
|
||||
tool,
|
||||
type CanUseTool,
|
||||
type HookCallbackMatcher,
|
||||
type SDKMessage,
|
||||
type SDKResultMessage,
|
||||
} from '@anthropic-ai/claude-agent-sdk';
|
||||
|
|
@ -38,6 +39,19 @@ const BUILT_IN_TOOLS = [
|
|||
'Write',
|
||||
];
|
||||
|
||||
function normalizeSdkToolName(toolName: string): string {
|
||||
return toolName.startsWith('mcp__ktx__') ? toolName.slice('mcp__ktx__'.length) : toolName;
|
||||
}
|
||||
|
||||
function sdkToolCallId(extra: unknown): string | undefined {
|
||||
if (!extra || typeof extra !== 'object') {
|
||||
return undefined;
|
||||
}
|
||||
const record = extra as Record<string, unknown>;
|
||||
const id = record.toolUseID ?? record.tool_use_id ?? record.toolCallId;
|
||||
return typeof id === 'string' ? id : undefined;
|
||||
}
|
||||
|
||||
export interface ClaudeAgentSdkRunnerServiceDeps {
|
||||
projectDir: string;
|
||||
modelSlots: Partial<Record<KtxModelRole, string>>;
|
||||
|
|
@ -75,6 +89,7 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
|
|||
private async consumeQuery(params: RunLoopParams): Promise<SDKResultMessage | undefined> {
|
||||
let result: SDKResultMessage | undefined;
|
||||
let stepIndex = 0;
|
||||
const hooks = this.toolFailureHooks(params);
|
||||
const session = this.query({
|
||||
prompt: params.userPrompt,
|
||||
options: {
|
||||
|
|
@ -96,6 +111,7 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
|
|||
disallowedTools: BUILT_IN_TOOLS,
|
||||
permissionMode: 'dontAsk',
|
||||
canUseTool: this.canUseKtxTool,
|
||||
...(hooks ? { hooks } : {}),
|
||||
},
|
||||
});
|
||||
|
||||
|
|
@ -118,9 +134,39 @@ export class ClaudeAgentSdkRunnerService implements AgentRunnerPort {
|
|||
return model ? { model } : {};
|
||||
}
|
||||
|
||||
private toolFailureHooks(
|
||||
params: RunLoopParams,
|
||||
): Partial<Record<'PostToolUseFailure', HookCallbackMatcher[]>> | undefined {
|
||||
if (!params.onToolFailure) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const hook: HookCallbackMatcher['hooks'][number] = async (input) => {
|
||||
if (input.hook_event_name !== 'PostToolUseFailure') {
|
||||
return { continue: true };
|
||||
}
|
||||
await params.onToolFailure?.({
|
||||
toolName: normalizeSdkToolName(input.tool_name),
|
||||
input: input.tool_input,
|
||||
toolCallId: input.tool_use_id,
|
||||
error: input.error,
|
||||
...(typeof input.duration_ms === 'number' ? { durationMs: input.duration_ms } : {}),
|
||||
});
|
||||
return {
|
||||
continue: true,
|
||||
hookSpecificOutput: { hookEventName: 'PostToolUseFailure' as const },
|
||||
};
|
||||
};
|
||||
|
||||
return { PostToolUseFailure: [{ hooks: [hook] }] };
|
||||
}
|
||||
|
||||
private toSdkTool(definition: AgentToolDefinition) {
|
||||
return this.tool(definition.name, definition.description, definition.inputSchema.shape, async (args) => {
|
||||
const output = await definition.execute(definition.inputSchema.parse(args), {});
|
||||
return this.tool(definition.name, definition.description, definition.inputSchema.shape, async (args, extra) => {
|
||||
const toolCallId = sdkToolCallId(extra);
|
||||
const output = await definition.execute(definition.inputSchema.parse(args), {
|
||||
...(toolCallId ? { toolCallId } : {}),
|
||||
});
|
||||
return { content: [{ type: 'text' as const, text: agentToolOutputToText(output) }] };
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,5 +10,6 @@ export type {
|
|||
RunLoopResult,
|
||||
RunLoopStepInfo,
|
||||
RunLoopStopReason,
|
||||
RunLoopToolFailure,
|
||||
} from './agent-runner.service.js';
|
||||
export { AgentRunnerService } from './agent-runner.service.js';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue