From 1c3436842f2da02861651590f89f8acf8c1b824f Mon Sep 17 00:00:00 2001
From: Andrey Avtomonov <andreybavt@gmail.com>
Date: Fri, 15 May 2026 13:14:04 +0200
Subject: [PATCH] fix: count claude sdk tool failures in work units

---
 .../src/ingest/ingest-bundle.runner.test.ts   | 47 +++++++++++++++++++
 .../src/ingest/ingest-bundle.runner.ts        | 30 +++++++++++-
 .../ingest/stages/stage-3-work-units.test.ts  | 27 +++++++++++
 .../src/ingest/stages/stage-3-work-units.ts   |  4 +-
 4 files changed, 106 insertions(+), 2 deletions(-)
diff --git a/packages/context/src/ingest/ingest-bundle.runner.test.ts b/packages/context/src/ingest/ingest-bundle.runner.test.ts
index 348eb591..abfd7ece 100644
--- a/packages/context/src/ingest/ingest-bundle.runner.test.ts
+++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts
@@ -912,6 +912,53 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
     );
   });
 
+  it('records SDK tool failures as fatal WorkUnit transcript failures', async () => {
+    const deps = makeDeps();
+    deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
+      if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
+        await params.onToolFailure?.({
+          toolName: 'read_raw_span',
+          input: { path: 42 },
+          toolCallId: 'schema-1',
+          error: 'Input validation failed: expected path to be a string',
+          durationMs: 4,
+        });
+      }
+      return { stopReason: 'natural' };
+    });
+
+    const runner = buildRunner(deps);
+    (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
+      currentHashes: new Map([['a.yml', 'h1']]),
+      rawDirInWorktree: 'raw-sources/c1/fake/s',
+    });
+    (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
+
+    await runner.run({
+      jobId: 'j1',
+      connectionId: 'c1',
+      sourceKey: 'fake',
+      trigger: 'upload',
+      bundleRef: { kind: 'upload', uploadId: 'upload-x' },
+    });
+
+    expect(deps.reportsRepo.create).toHaveBeenCalledWith(
+      expect.objectContaining({
+        body: expect.objectContaining({
+          failedWorkUnits: ['u1'],
+          toolTranscripts: [
+            expect.objectContaining({
+              unitKey: 'u1',
+              toolCallCount: 1,
+              errorCount: 1,
+              toolNames: ['read_raw_span'],
+            }),
+          ],
+        }),
+      }),
+    );
+  });
+
   it('persists WorkUnit unmapped fallback records in the report body', async () => {
     const deps = makeDeps();
     deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
diff --git a/packages/context/src/ingest/ingest-bundle.runner.ts b/packages/context/src/ingest/ingest-bundle.runner.ts
index 52fd33f0..67afdce8 100644
--- a/packages/context/src/ingest/ingest-bundle.runner.ts
+++ b/packages/context/src/ingest/ingest-bundle.runner.ts
@@ -2,7 +2,7 @@ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
 import { dirname, join } from 'node:path';
 import pLimit from 'p-limit';
 import { z } from 'zod';
-import { createAgentTool, type AgentToolSet } from '../agent/index.js';
+import { createAgentTool, type AgentToolSet, type RunLoopToolFailure } from '../agent/index.js';
 import { type KtxLogger, noopLogger } from '../core/index.js';
 import type { CaptureSession, MemoryAction } from '../memory/index.js';
 import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
@@ -401,14 +401,40 @@ export class IngestBundleRunner {
     }
     const transcriptDir = this.deps.storage.resolveTranscriptDir(job.jobId);
     const transcriptSummaries = new Map<string, MutableToolTranscriptSummary>();
+    const recordedToolErrorKeys = new Set<string>();
+    const transcriptErrorKey = (
+      entry: Pick<ToolCallLogEntry, 'wuKey' | 'toolName' | 'toolCallId' | 'error'>,
+    ): string | null => (entry.error && entry.toolCallId ? `${entry.wuKey}:${entry.toolName}:${entry.toolCallId}` : null);
     const recordTranscriptEntry =
       (path: string) =>
       (entry: ToolCallLogEntry): void => {
+        const errorKey = transcriptErrorKey(entry);
+        if (errorKey) {
+          recordedToolErrorKeys.add(errorKey);
+        }
         const current =
           transcriptSummaries.get(entry.wuKey) ?? createMutableToolTranscriptSummary(entry.wuKey, path);
         recordToolTranscriptEntry(current, entry);
         transcriptSummaries.set(entry.wuKey, current);
       };
+    const recordSdkToolFailure =
+      (path: string, unitKey: string) =>
+      (failure: RunLoopToolFailure): void => {
+        const entry: ToolCallLogEntry = {
+          ts: new Date().toISOString(),
+          wuKey: unitKey,
+          ...(failure.toolCallId ? { toolCallId: failure.toolCallId } : {}),
+          toolName: failure.toolName,
+          durationMs: failure.durationMs ?? 0,
+          input: failure.input,
+          error: { message: failure.error },
+        };
+        const errorKey = transcriptErrorKey(entry);
+        if (errorKey && recordedToolErrorKeys.has(errorKey)) {
+          return;
+        }
+        recordTranscriptEntry(path)(entry);
+      };
     const overrideReport = await this.loadOverrideReport(job);
 
     const stage1 = ctx?.startPhase(0.08);
@@ -779,6 +805,8 @@ export class IngestBundleRunner {
               sourceKey: job.sourceKey,
               connectionId: job.connectionId,
               jobId: job.jobId,
+              onToolFailure: (unitKey, failure) =>
+                recordSdkToolFailure(join(transcriptDir, `${unitKey}.jsonl`), unitKey)(failure),
               toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0,
               onStepFinish: ({ stepIndex, stepBudget }) => {
                 memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });
diff --git a/packages/context/src/ingest/stages/stage-3-work-units.test.ts b/packages/context/src/ingest/stages/stage-3-work-units.test.ts
index 23ec3fa8..c059d093 100644
--- a/packages/context/src/ingest/stages/stage-3-work-units.test.ts
+++ b/packages/context/src/ingest/stages/stage-3-work-units.test.ts
@@ -121,6 +121,33 @@ describe('Stage 3 — executeWorkUnit', () => {
     expect(deps.resetHardTo).toHaveBeenCalledWith('pre');
   });
 
+  it('forwards runner tool failures with the current WorkUnit key', async () => {
+    const deps = makeDeps();
+    const onToolFailure = vi.fn();
+    deps.onToolFailure = onToolFailure;
+    deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
+    deps.agentRunner.runLoop = vi.fn().mockImplementation(async (params: any) => {
+      await params.onToolFailure?.({
+        toolName: 'read_raw_span',
+        input: { path: 42 },
+        toolCallId: 'tool-1',
+        error: 'Input validation failed',
+        durationMs: 3,
+      });
+      return { stopReason: 'natural' };
+    });
+
+    await executeWorkUnit(deps, makeWu());
+
+    expect(onToolFailure).toHaveBeenCalledWith('u1', {
+      toolName: 'read_raw_span',
+      input: { path: 42 },
+      toolCallId: 'tool-1',
+      error: 'Input validation failed',
+      durationMs: 3,
+    });
+  });
+
   it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => {
     const deps = makeDeps();
     deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
diff --git a/packages/context/src/ingest/stages/stage-3-work-units.ts b/packages/context/src/ingest/stages/stage-3-work-units.ts
index 28c88829..e73116af 100644
--- a/packages/context/src/ingest/stages/stage-3-work-units.ts
+++ b/packages/context/src/ingest/stages/stage-3-work-units.ts
@@ -1,4 +1,4 @@
-import type { AgentRunnerPort, AgentToolSet } from '@ktx/context/agent';
+import type { AgentRunnerPort, AgentToolSet, RunLoopToolFailure } from '@ktx/context/agent';
 import type { KtxModelRole } from '@ktx/llm';
 import type { CaptureSession, MemoryAction } from '../../memory/index.js';
 import { listTouchedSlSources, type TouchedSlSource } from '../../tools/index.js';
@@ -27,6 +27,7 @@ export interface WorkUnitExecutionDeps {
   connectionId: string;
   jobId: string;
   onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void;
+  onToolFailure?: (unitKey: string, failure: RunLoopToolFailure) => void | Promise<void>;
   toolFailureCount?: (unitKey: string) => number;
 }
 
@@ -100,6 +101,7 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit)
         jobId: deps.jobId,
       },
       onStepFinish: deps.onStepFinish,
+      onToolFailure: deps.onToolFailure ? (failure) => deps.onToolFailure?.(wu.unitKey, failure) : undefined,
     });
   } catch (error) {
     return failWithResetFromCurrentHead(error instanceof Error ? error.message : String(error));