mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
fix: count claude sdk tool failures in work units
This commit is contained in:
parent
0ce798de68
commit
1c3436842f
4 changed files with 106 additions and 2 deletions
|
|
@ -912,6 +912,53 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('records SDK tool failures as fatal WorkUnit transcript failures', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
|
||||
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
|
||||
await params.onToolFailure?.({
|
||||
toolName: 'read_raw_span',
|
||||
input: { path: 42 },
|
||||
toolCallId: 'schema-1',
|
||||
error: 'Input validation failed: expected path to be a string',
|
||||
durationMs: 4,
|
||||
});
|
||||
}
|
||||
return { stopReason: 'natural' };
|
||||
});
|
||||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['a.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/c1/fake/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
await runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
body: expect.objectContaining({
|
||||
failedWorkUnits: ['u1'],
|
||||
toolTranscripts: [
|
||||
expect.objectContaining({
|
||||
unitKey: 'u1',
|
||||
toolCallCount: 1,
|
||||
errorCount: 1,
|
||||
toolNames: ['read_raw_span'],
|
||||
}),
|
||||
],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('persists WorkUnit unmapped fallback records in the report body', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|||
import { dirname, join } from 'node:path';
|
||||
import pLimit from 'p-limit';
|
||||
import { z } from 'zod';
|
||||
import { createAgentTool, type AgentToolSet } from '../agent/index.js';
|
||||
import { createAgentTool, type AgentToolSet, type RunLoopToolFailure } from '../agent/index.js';
|
||||
import { type KtxLogger, noopLogger } from '../core/index.js';
|
||||
import type { CaptureSession, MemoryAction } from '../memory/index.js';
|
||||
import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
|
||||
|
|
@ -401,14 +401,40 @@ export class IngestBundleRunner {
|
|||
}
|
||||
const transcriptDir = this.deps.storage.resolveTranscriptDir(job.jobId);
|
||||
const transcriptSummaries = new Map<string, MutableToolTranscriptSummary>();
|
||||
const recordedToolErrorKeys = new Set<string>();
|
||||
const transcriptErrorKey = (
|
||||
entry: Pick<ToolCallLogEntry, 'wuKey' | 'toolName' | 'toolCallId' | 'error'>,
|
||||
): string | null => (entry.error && entry.toolCallId ? `${entry.wuKey}:${entry.toolName}:${entry.toolCallId}` : null);
|
||||
const recordTranscriptEntry =
|
||||
(path: string) =>
|
||||
(entry: ToolCallLogEntry): void => {
|
||||
const errorKey = transcriptErrorKey(entry);
|
||||
if (errorKey) {
|
||||
recordedToolErrorKeys.add(errorKey);
|
||||
}
|
||||
const current =
|
||||
transcriptSummaries.get(entry.wuKey) ?? createMutableToolTranscriptSummary(entry.wuKey, path);
|
||||
recordToolTranscriptEntry(current, entry);
|
||||
transcriptSummaries.set(entry.wuKey, current);
|
||||
};
|
||||
const recordSdkToolFailure =
|
||||
(path: string, unitKey: string) =>
|
||||
(failure: RunLoopToolFailure): void => {
|
||||
const entry: ToolCallLogEntry = {
|
||||
ts: new Date().toISOString(),
|
||||
wuKey: unitKey,
|
||||
...(failure.toolCallId ? { toolCallId: failure.toolCallId } : {}),
|
||||
toolName: failure.toolName,
|
||||
durationMs: failure.durationMs ?? 0,
|
||||
input: failure.input,
|
||||
error: { message: failure.error },
|
||||
};
|
||||
const errorKey = transcriptErrorKey(entry);
|
||||
if (errorKey && recordedToolErrorKeys.has(errorKey)) {
|
||||
return;
|
||||
}
|
||||
recordTranscriptEntry(path)(entry);
|
||||
};
|
||||
const overrideReport = await this.loadOverrideReport(job);
|
||||
|
||||
const stage1 = ctx?.startPhase(0.08);
|
||||
|
|
@ -779,6 +805,8 @@ export class IngestBundleRunner {
|
|||
sourceKey: job.sourceKey,
|
||||
connectionId: job.connectionId,
|
||||
jobId: job.jobId,
|
||||
onToolFailure: (unitKey, failure) =>
|
||||
recordSdkToolFailure(join(transcriptDir, `${unitKey}.jsonl`), unitKey)(failure),
|
||||
toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0,
|
||||
onStepFinish: ({ stepIndex, stepBudget }) => {
|
||||
memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });
|
||||
|
|
|
|||
|
|
@ -121,6 +121,33 @@ describe('Stage 3 — executeWorkUnit', () => {
|
|||
expect(deps.resetHardTo).toHaveBeenCalledWith('pre');
|
||||
});
|
||||
|
||||
it('forwards runner tool failures with the current WorkUnit key', async () => {
|
||||
const deps = makeDeps();
|
||||
const onToolFailure = vi.fn();
|
||||
deps.onToolFailure = onToolFailure;
|
||||
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
|
||||
deps.agentRunner.runLoop = vi.fn().mockImplementation(async (params: any) => {
|
||||
await params.onToolFailure?.({
|
||||
toolName: 'read_raw_span',
|
||||
input: { path: 42 },
|
||||
toolCallId: 'tool-1',
|
||||
error: 'Input validation failed',
|
||||
durationMs: 3,
|
||||
});
|
||||
return { stopReason: 'natural' };
|
||||
});
|
||||
|
||||
await executeWorkUnit(deps, makeWu());
|
||||
|
||||
expect(onToolFailure).toHaveBeenCalledWith('u1', {
|
||||
toolName: 'read_raw_span',
|
||||
input: { path: 42 },
|
||||
toolCallId: 'tool-1',
|
||||
error: 'Input validation failed',
|
||||
durationMs: 3,
|
||||
});
|
||||
});
|
||||
|
||||
it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import type { AgentRunnerPort, AgentToolSet } from '@ktx/context/agent';
|
||||
import type { AgentRunnerPort, AgentToolSet, RunLoopToolFailure } from '@ktx/context/agent';
|
||||
import type { KtxModelRole } from '@ktx/llm';
|
||||
import type { CaptureSession, MemoryAction } from '../../memory/index.js';
|
||||
import { listTouchedSlSources, type TouchedSlSource } from '../../tools/index.js';
|
||||
|
|
@ -27,6 +27,7 @@ export interface WorkUnitExecutionDeps {
|
|||
connectionId: string;
|
||||
jobId: string;
|
||||
onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void;
|
||||
onToolFailure?: (unitKey: string, failure: RunLoopToolFailure) => void | Promise<void>;
|
||||
toolFailureCount?: (unitKey: string) => number;
|
||||
}
|
||||
|
||||
|
|
@ -100,6 +101,7 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit)
|
|||
jobId: deps.jobId,
|
||||
},
|
||||
onStepFinish: deps.onStepFinish,
|
||||
onToolFailure: deps.onToolFailure ? (failure) => deps.onToolFailure?.(wu.unitKey, failure) : undefined,
|
||||
});
|
||||
} catch (error) {
|
||||
return failWithResetFromCurrentHead(error instanceof Error ? error.message : String(error));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue