fix: count claude sdk tool failures in work units

2026-06-22 08:38:08 +02:00 · 2026-05-15 13:14:04 +02:00 · 2026-05-15 13:14:04 +02:00 · 1c3436842f
commit 1c3436842f
parent 0ce798de68
4 changed files with 106 additions and 2 deletions
--- a/packages/context/src/ingest/ingest-bundle.runner.test.ts
+++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts
@ -912,6 +912,53 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
    );
  });

+  it('records SDK tool failures as fatal WorkUnit transcript failures', async () => {
+    const deps = makeDeps();
+    deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
+      if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
+        await params.onToolFailure?.({
+          toolName: 'read_raw_span',
+          input: { path: 42 },
+          toolCallId: 'schema-1',
+          error: 'Input validation failed: expected path to be a string',
+          durationMs: 4,
+        });
+      }
+      return { stopReason: 'natural' };
+    });
+
+    const runner = buildRunner(deps);
+    (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
+      currentHashes: new Map([['a.yml', 'h1']]),
+      rawDirInWorktree: 'raw-sources/c1/fake/s',
+    });
+    (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
+
+    await runner.run({
+      jobId: 'j1',
+      connectionId: 'c1',
+      sourceKey: 'fake',
+      trigger: 'upload',
+      bundleRef: { kind: 'upload', uploadId: 'upload-x' },
+    });
+
+    expect(deps.reportsRepo.create).toHaveBeenCalledWith(
+      expect.objectContaining({
+        body: expect.objectContaining({
+          failedWorkUnits: ['u1'],
+          toolTranscripts: [
+            expect.objectContaining({
+              unitKey: 'u1',
+              toolCallCount: 1,
+              errorCount: 1,
+              toolNames: ['read_raw_span'],
+            }),
+          ],
+        }),
+      }),
+    );
+  });
+
  it('persists WorkUnit unmapped fallback records in the report body', async () => {
    const deps = makeDeps();
    deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
--- a/packages/context/src/ingest/ingest-bundle.runner.ts
+++ b/packages/context/src/ingest/ingest-bundle.runner.ts
@ -2,7 +2,7 @@ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
 import { dirname, join } from 'node:path';
 import pLimit from 'p-limit';
 import { z } from 'zod';
-import { createAgentTool, type AgentToolSet } from '../agent/index.js';
+import { createAgentTool, type AgentToolSet, type RunLoopToolFailure } from '../agent/index.js';
 import { type KtxLogger, noopLogger } from '../core/index.js';
 import type { CaptureSession, MemoryAction } from '../memory/index.js';
 import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
@ -401,14 +401,40 @@ export class IngestBundleRunner {
    }
    const transcriptDir = this.deps.storage.resolveTranscriptDir(job.jobId);
    const transcriptSummaries = new Map<string, MutableToolTranscriptSummary>();
+    const recordedToolErrorKeys = new Set<string>();
+    const transcriptErrorKey = (
+      entry: Pick<ToolCallLogEntry, 'wuKey' | 'toolName' | 'toolCallId' | 'error'>,
+    ): string | null => (entry.error && entry.toolCallId ? `${entry.wuKey}:${entry.toolName}:${entry.toolCallId}` : null);
    const recordTranscriptEntry =
      (path: string) =>
      (entry: ToolCallLogEntry): void => {
+        const errorKey = transcriptErrorKey(entry);
+        if (errorKey) {
+          recordedToolErrorKeys.add(errorKey);
+        }
        const current =
          transcriptSummaries.get(entry.wuKey) ?? createMutableToolTranscriptSummary(entry.wuKey, path);
        recordToolTranscriptEntry(current, entry);
        transcriptSummaries.set(entry.wuKey, current);
      };
+    const recordSdkToolFailure =
+      (path: string, unitKey: string) =>
+      (failure: RunLoopToolFailure): void => {
+        const entry: ToolCallLogEntry = {
+          ts: new Date().toISOString(),
+          wuKey: unitKey,
+          ...(failure.toolCallId ? { toolCallId: failure.toolCallId } : {}),
+          toolName: failure.toolName,
+          durationMs: failure.durationMs ?? 0,
+          input: failure.input,
+          error: { message: failure.error },
+        };
+        const errorKey = transcriptErrorKey(entry);
+        if (errorKey && recordedToolErrorKeys.has(errorKey)) {
+          return;
+        }
+        recordTranscriptEntry(path)(entry);
+      };
    const overrideReport = await this.loadOverrideReport(job);

    const stage1 = ctx?.startPhase(0.08);
@ -779,6 +805,8 @@ export class IngestBundleRunner {
              sourceKey: job.sourceKey,
              connectionId: job.connectionId,
              jobId: job.jobId,
+              onToolFailure: (unitKey, failure) =>
+                recordSdkToolFailure(join(transcriptDir, `${unitKey}.jsonl`), unitKey)(failure),
              toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0,
              onStepFinish: ({ stepIndex, stepBudget }) => {
                memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });
--- a/packages/context/src/ingest/stages/stage-3-work-units.test.ts
+++ b/packages/context/src/ingest/stages/stage-3-work-units.test.ts
@ -121,6 +121,33 @@ describe('Stage 3 — executeWorkUnit', () => {
    expect(deps.resetHardTo).toHaveBeenCalledWith('pre');
  });

+  it('forwards runner tool failures with the current WorkUnit key', async () => {
+    const deps = makeDeps();
+    const onToolFailure = vi.fn();
+    deps.onToolFailure = onToolFailure;
+    deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
+    deps.agentRunner.runLoop = vi.fn().mockImplementation(async (params: any) => {
+      await params.onToolFailure?.({
+        toolName: 'read_raw_span',
+        input: { path: 42 },
+        toolCallId: 'tool-1',
+        error: 'Input validation failed',
+        durationMs: 3,
+      });
+      return { stopReason: 'natural' };
+    });
+
+    await executeWorkUnit(deps, makeWu());
+
+    expect(onToolFailure).toHaveBeenCalledWith('u1', {
+      toolName: 'read_raw_span',
+      input: { path: 42 },
+      toolCallId: 'tool-1',
+      error: 'Input validation failed',
+      durationMs: 3,
+    });
+  });
+
  it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => {
    const deps = makeDeps();
    deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
--- a/packages/context/src/ingest/stages/stage-3-work-units.ts
+++ b/packages/context/src/ingest/stages/stage-3-work-units.ts
@ -1,4 +1,4 @@
-import type { AgentRunnerPort, AgentToolSet } from '@ktx/context/agent';
+import type { AgentRunnerPort, AgentToolSet, RunLoopToolFailure } from '@ktx/context/agent';
 import type { KtxModelRole } from '@ktx/llm';
 import type { CaptureSession, MemoryAction } from '../../memory/index.js';
 import { listTouchedSlSources, type TouchedSlSource } from '../../tools/index.js';
@ -27,6 +27,7 @@ export interface WorkUnitExecutionDeps {
  connectionId: string;
  jobId: string;
  onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void;
+  onToolFailure?: (unitKey: string, failure: RunLoopToolFailure) => void | Promise<void>;
  toolFailureCount?: (unitKey: string) => number;
 }

@ -100,6 +101,7 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit)
        jobId: deps.jobId,
      },
      onStepFinish: deps.onStepFinish,
+      onToolFailure: deps.onToolFailure ? (failure) => deps.onToolFailure?.(wu.unitKey, failure) : undefined,
    });
  } catch (error) {
    return failWithResetFromCurrentHead(error instanceof Error ? error.message : String(error));