fix: count claude sdk tool failures in work units

This commit is contained in:
Andrey Avtomonov 2026-05-15 13:14:04 +02:00
parent 0ce798de68
commit 1c3436842f
4 changed files with 106 additions and 2 deletions

View file

@ -912,6 +912,53 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
);
});
it('records SDK tool failures as fatal WorkUnit transcript failures', async () => {
const deps = makeDeps();
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
await params.onToolFailure?.({
toolName: 'read_raw_span',
input: { path: 42 },
toolCallId: 'schema-1',
error: 'Input validation failed: expected path to be a string',
durationMs: 4,
});
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
failedWorkUnits: ['u1'],
toolTranscripts: [
expect.objectContaining({
unitKey: 'u1',
toolCallCount: 1,
errorCount: 1,
toolNames: ['read_raw_span'],
}),
],
}),
}),
);
});
it('persists WorkUnit unmapped fallback records in the report body', async () => {
const deps = makeDeps();
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {

View file

@ -2,7 +2,7 @@ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import pLimit from 'p-limit';
import { z } from 'zod';
import { createAgentTool, type AgentToolSet } from '../agent/index.js';
import { createAgentTool, type AgentToolSet, type RunLoopToolFailure } from '../agent/index.js';
import { type KtxLogger, noopLogger } from '../core/index.js';
import type { CaptureSession, MemoryAction } from '../memory/index.js';
import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
@ -401,14 +401,40 @@ export class IngestBundleRunner {
}
const transcriptDir = this.deps.storage.resolveTranscriptDir(job.jobId);
const transcriptSummaries = new Map<string, MutableToolTranscriptSummary>();
const recordedToolErrorKeys = new Set<string>();
const transcriptErrorKey = (
entry: Pick<ToolCallLogEntry, 'wuKey' | 'toolName' | 'toolCallId' | 'error'>,
): string | null => (entry.error && entry.toolCallId ? `${entry.wuKey}:${entry.toolName}:${entry.toolCallId}` : null);
const recordTranscriptEntry =
(path: string) =>
(entry: ToolCallLogEntry): void => {
const errorKey = transcriptErrorKey(entry);
if (errorKey) {
recordedToolErrorKeys.add(errorKey);
}
const current =
transcriptSummaries.get(entry.wuKey) ?? createMutableToolTranscriptSummary(entry.wuKey, path);
recordToolTranscriptEntry(current, entry);
transcriptSummaries.set(entry.wuKey, current);
};
const recordSdkToolFailure =
(path: string, unitKey: string) =>
(failure: RunLoopToolFailure): void => {
const entry: ToolCallLogEntry = {
ts: new Date().toISOString(),
wuKey: unitKey,
...(failure.toolCallId ? { toolCallId: failure.toolCallId } : {}),
toolName: failure.toolName,
durationMs: failure.durationMs ?? 0,
input: failure.input,
error: { message: failure.error },
};
const errorKey = transcriptErrorKey(entry);
if (errorKey && recordedToolErrorKeys.has(errorKey)) {
return;
}
recordTranscriptEntry(path)(entry);
};
const overrideReport = await this.loadOverrideReport(job);
const stage1 = ctx?.startPhase(0.08);
@ -779,6 +805,8 @@ export class IngestBundleRunner {
sourceKey: job.sourceKey,
connectionId: job.connectionId,
jobId: job.jobId,
onToolFailure: (unitKey, failure) =>
recordSdkToolFailure(join(transcriptDir, `${unitKey}.jsonl`), unitKey)(failure),
toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0,
onStepFinish: ({ stepIndex, stepBudget }) => {
memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });

View file

@ -121,6 +121,33 @@ describe('Stage 3 — executeWorkUnit', () => {
expect(deps.resetHardTo).toHaveBeenCalledWith('pre');
});
it('forwards runner tool failures with the current WorkUnit key', async () => {
const deps = makeDeps();
const onToolFailure = vi.fn();
deps.onToolFailure = onToolFailure;
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
deps.agentRunner.runLoop = vi.fn().mockImplementation(async (params: any) => {
await params.onToolFailure?.({
toolName: 'read_raw_span',
input: { path: 42 },
toolCallId: 'tool-1',
error: 'Input validation failed',
durationMs: 3,
});
return { stopReason: 'natural' };
});
await executeWorkUnit(deps, makeWu());
expect(onToolFailure).toHaveBeenCalledWith('u1', {
toolName: 'read_raw_span',
input: { path: 42 },
toolCallId: 'tool-1',
error: 'Input validation failed',
durationMs: 3,
});
});
it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => {
const deps = makeDeps();
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');

View file

@ -1,4 +1,4 @@
import type { AgentRunnerPort, AgentToolSet } from '@ktx/context/agent';
import type { AgentRunnerPort, AgentToolSet, RunLoopToolFailure } from '@ktx/context/agent';
import type { KtxModelRole } from '@ktx/llm';
import type { CaptureSession, MemoryAction } from '../../memory/index.js';
import { listTouchedSlSources, type TouchedSlSource } from '../../tools/index.js';
@ -27,6 +27,7 @@ export interface WorkUnitExecutionDeps {
connectionId: string;
jobId: string;
onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void;
onToolFailure?: (unitKey: string, failure: RunLoopToolFailure) => void | Promise<void>;
toolFailureCount?: (unitKey: string) => number;
}
@ -100,6 +101,7 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit)
jobId: deps.jobId,
},
onStepFinish: deps.onStepFinish,
onToolFailure: deps.onToolFailure ? (failure) => deps.onToolFailure?.(wu.unitKey, failure) : undefined,
});
} catch (error) {
return failWithResetFromCurrentHead(error instanceof Error ? error.message : String(error));