From 1c3436842f2da02861651590f89f8acf8c1b824f Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 15 May 2026 13:14:04 +0200 Subject: [PATCH] fix: count claude sdk tool failures in work units --- .../src/ingest/ingest-bundle.runner.test.ts | 47 +++++++++++++++++++ .../src/ingest/ingest-bundle.runner.ts | 30 +++++++++++- .../ingest/stages/stage-3-work-units.test.ts | 27 +++++++++++ .../src/ingest/stages/stage-3-work-units.ts | 4 +- 4 files changed, 106 insertions(+), 2 deletions(-) diff --git a/packages/context/src/ingest/ingest-bundle.runner.test.ts b/packages/context/src/ingest/ingest-bundle.runner.test.ts index 348eb591..abfd7ece 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.test.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts @@ -912,6 +912,53 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { ); }); + it('records SDK tool failures as fatal WorkUnit transcript failures', async () => { + const deps = makeDeps(); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await params.onToolFailure?.({ + toolName: 'read_raw_span', + input: { path: 42 }, + toolCallId: 'schema-1', + error: 'Input validation failed: expected path to be a string', + durationMs: 4, + }); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + failedWorkUnits: ['u1'], + toolTranscripts: [ + expect.objectContaining({ + unitKey: 'u1', + toolCallCount: 1, + errorCount: 1, + toolNames: ['read_raw_span'], + }), + ], + }), + }), + ); + }); + it('persists WorkUnit unmapped fallback records in the report body', async () => { const deps = makeDeps(); deps.agentRunner.runLoop.mockImplementation(async (params: any) => { diff --git a/packages/context/src/ingest/ingest-bundle.runner.ts b/packages/context/src/ingest/ingest-bundle.runner.ts index 52fd33f0..67afdce8 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.ts @@ -2,7 +2,7 @@ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; import { dirname, join } from 'node:path'; import pLimit from 'p-limit'; import { z } from 'zod'; -import { createAgentTool, type AgentToolSet } from '../agent/index.js'; +import { createAgentTool, type AgentToolSet, type RunLoopToolFailure } from '../agent/index.js'; import { type KtxLogger, noopLogger } from '../core/index.js'; import type { CaptureSession, MemoryAction } from '../memory/index.js'; import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js'; @@ -401,14 +401,40 @@ export class IngestBundleRunner { } const transcriptDir = this.deps.storage.resolveTranscriptDir(job.jobId); const transcriptSummaries = new Map(); + const recordedToolErrorKeys = new Set(); + const transcriptErrorKey = ( + entry: Pick, + ): string | null => (entry.error && entry.toolCallId ? `${entry.wuKey}:${entry.toolName}:${entry.toolCallId}` : null); const recordTranscriptEntry = (path: string) => (entry: ToolCallLogEntry): void => { + const errorKey = transcriptErrorKey(entry); + if (errorKey) { + recordedToolErrorKeys.add(errorKey); + } const current = transcriptSummaries.get(entry.wuKey) ?? createMutableToolTranscriptSummary(entry.wuKey, path); recordToolTranscriptEntry(current, entry); transcriptSummaries.set(entry.wuKey, current); }; + const recordSdkToolFailure = + (path: string, unitKey: string) => + (failure: RunLoopToolFailure): void => { + const entry: ToolCallLogEntry = { + ts: new Date().toISOString(), + wuKey: unitKey, + ...(failure.toolCallId ? { toolCallId: failure.toolCallId } : {}), + toolName: failure.toolName, + durationMs: failure.durationMs ?? 0, + input: failure.input, + error: { message: failure.error }, + }; + const errorKey = transcriptErrorKey(entry); + if (errorKey && recordedToolErrorKeys.has(errorKey)) { + return; + } + recordTranscriptEntry(path)(entry); + }; const overrideReport = await this.loadOverrideReport(job); const stage1 = ctx?.startPhase(0.08); @@ -779,6 +805,8 @@ export class IngestBundleRunner { sourceKey: job.sourceKey, connectionId: job.connectionId, jobId: job.jobId, + onToolFailure: (unitKey, failure) => + recordSdkToolFailure(join(transcriptDir, `${unitKey}.jsonl`), unitKey)(failure), toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0, onStepFinish: ({ stepIndex, stepBudget }) => { memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget }); diff --git a/packages/context/src/ingest/stages/stage-3-work-units.test.ts b/packages/context/src/ingest/stages/stage-3-work-units.test.ts index 23ec3fa8..c059d093 100644 --- a/packages/context/src/ingest/stages/stage-3-work-units.test.ts +++ b/packages/context/src/ingest/stages/stage-3-work-units.test.ts @@ -121,6 +121,33 @@ describe('Stage 3 — executeWorkUnit', () => { expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); }); + it('forwards runner tool failures with the current WorkUnit key', async () => { + const deps = makeDeps(); + const onToolFailure = vi.fn(); + deps.onToolFailure = onToolFailure; + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); + deps.agentRunner.runLoop = vi.fn().mockImplementation(async (params: any) => { + await params.onToolFailure?.({ + toolName: 'read_raw_span', + input: { path: 42 }, + toolCallId: 'tool-1', + error: 'Input validation failed', + durationMs: 3, + }); + return { stopReason: 'natural' }; + }); + + await executeWorkUnit(deps, makeWu()); + + expect(onToolFailure).toHaveBeenCalledWith('u1', { + toolName: 'read_raw_span', + input: { path: 42 }, + toolCallId: 'tool-1', + error: 'Input validation failed', + durationMs: 3, + }); + }); + it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => { const deps = makeDeps(); deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); diff --git a/packages/context/src/ingest/stages/stage-3-work-units.ts b/packages/context/src/ingest/stages/stage-3-work-units.ts index 28c88829..e73116af 100644 --- a/packages/context/src/ingest/stages/stage-3-work-units.ts +++ b/packages/context/src/ingest/stages/stage-3-work-units.ts @@ -1,4 +1,4 @@ -import type { AgentRunnerPort, AgentToolSet } from '@ktx/context/agent'; +import type { AgentRunnerPort, AgentToolSet, RunLoopToolFailure } from '@ktx/context/agent'; import type { KtxModelRole } from '@ktx/llm'; import type { CaptureSession, MemoryAction } from '../../memory/index.js'; import { listTouchedSlSources, type TouchedSlSource } from '../../tools/index.js'; @@ -27,6 +27,7 @@ export interface WorkUnitExecutionDeps { connectionId: string; jobId: string; onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; + onToolFailure?: (unitKey: string, failure: RunLoopToolFailure) => void | Promise; toolFailureCount?: (unitKey: string) => number; } @@ -100,6 +101,7 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit) jobId: deps.jobId, }, onStepFinish: deps.onStepFinish, + onToolFailure: deps.onToolFailure ? (failure) => deps.onToolFailure?.(wu.unitKey, failure) : undefined, }); } catch (error) { return failWithResetFromCurrentHead(error instanceof Error ? error.message : String(error));