mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
fix: drive codex loop metrics from mcp events
This commit is contained in:
parent
1430ca49eb
commit
f27fc9c9a5
4 changed files with 132 additions and 49 deletions
|
|
@ -81,7 +81,8 @@ export function summarizeCodexExecEvents(
|
|||
let finalText = '';
|
||||
let stopReason: RunLoopStopReason = 'natural';
|
||||
let usage: LlmTokenUsage = {};
|
||||
let stepCount = 0;
|
||||
let turnCount = 0;
|
||||
let completedToolStepCount = 0;
|
||||
const stepBoundariesMs: number[] = [];
|
||||
let toolCallCount = 0;
|
||||
const toolFailures: string[] = [];
|
||||
|
|
@ -95,13 +96,38 @@ export function summarizeCodexExecEvents(
|
|||
}
|
||||
|
||||
if (eventType === 'turn.started') {
|
||||
stepCount += 1;
|
||||
turnCount += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const item = record(eventRecord.item);
|
||||
const itemType = text(item?.type);
|
||||
|
||||
if (eventType === 'item.started' && itemType === 'mcp_tool_call') {
|
||||
toolCallCount += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'item.completed' && itemType === 'mcp_tool_call') {
|
||||
completedToolStepCount += 1;
|
||||
stepBoundariesMs.push(now() - startedAt);
|
||||
if (item?.error !== undefined || item?.status === 'failed') {
|
||||
const name = text(item.name) ?? text(item.tool) ?? text(item.tool_name) ?? 'unknown';
|
||||
toolFailures.push(`${name}: ${errorMessageFrom(item.error)}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'item.completed' && itemType === 'agent_message') {
|
||||
finalText = text(item?.text) ?? finalText;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'turn.completed') {
|
||||
usage = usageFrom(eventRecord.usage);
|
||||
stepBoundariesMs.push(now() - startedAt);
|
||||
if (completedToolStepCount === 0) {
|
||||
stepBoundariesMs.push(now() - startedAt);
|
||||
}
|
||||
stopReason = stopReasonFrom(eventRecord.reason ?? eventRecord.stop_reason ?? eventRecord.terminal_reason);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -109,28 +135,6 @@ export function summarizeCodexExecEvents(
|
|||
if (eventType === 'turn.failed' || eventType === 'error') {
|
||||
stopReason = 'error';
|
||||
error = new Error(errorMessageFrom(eventRecord.error ?? eventRecord.message));
|
||||
continue;
|
||||
}
|
||||
|
||||
const item = record(eventRecord.item);
|
||||
const itemType = text(item?.type);
|
||||
if (!item || !itemType) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'item.completed' && itemType === 'agent_message') {
|
||||
finalText = text(item.text) ?? finalText;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'item.started' && itemType === 'mcp_tool_call') {
|
||||
toolCallCount += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'item.completed' && itemType === 'mcp_tool_call' && item.error !== undefined) {
|
||||
const name = text(item.name) ?? text(item.tool) ?? text(item.tool_name) ?? 'unknown';
|
||||
toolFailures.push(`${name}: ${errorMessageFrom(item.error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +142,7 @@ export function summarizeCodexExecEvents(
|
|||
finalText,
|
||||
stopReason,
|
||||
usage,
|
||||
stepCount,
|
||||
stepCount: completedToolStepCount > 0 ? completedToolStepCount : turnCount,
|
||||
stepBoundariesMs,
|
||||
toolCallCount,
|
||||
toolFailures,
|
||||
|
|
|
|||
|
|
@ -46,9 +46,20 @@ function metrics(summary: CodexExecEventSummary, startedAt: number): { totalMs:
|
|||
return { totalMs: Date.now() - startedAt, usage: summary.usage };
|
||||
}
|
||||
|
||||
function assertSuccessfulText(summary: CodexExecEventSummary): string {
|
||||
function summaryError(summary: CodexExecEventSummary): Error | undefined {
|
||||
if (summary.error) {
|
||||
throw summary.error;
|
||||
return summary.error;
|
||||
}
|
||||
if (summary.toolFailures.length > 0) {
|
||||
return new Error(`Codex runtime tool call failed: ${summary.toolFailures.join('; ')}`);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function assertSuccessfulText(summary: CodexExecEventSummary): string {
|
||||
const error = summaryError(summary);
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
if (!summary.finalText.trim()) {
|
||||
throw new Error('Codex completed without an agent message');
|
||||
|
|
@ -215,9 +226,11 @@ export class CodexKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
);
|
||||
}
|
||||
}
|
||||
const error = summaryError(summary);
|
||||
const stopReason = error ? 'error' : summary.stopReason;
|
||||
return {
|
||||
stopReason: summary.stopReason,
|
||||
...(summary.stopReason === 'error' && summary.error ? { error: summary.error } : {}),
|
||||
stopReason,
|
||||
...(stopReason === 'error' && error ? { error } : {}),
|
||||
metrics: {
|
||||
totalMs: Date.now() - startedAt,
|
||||
usage: summary.usage,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import {
|
|||
} from '../../../src/context/llm/codex-exec-events.js';
|
||||
|
||||
describe('Codex exec event parsing', () => {
|
||||
it('captures final agent text, SDK usage, steps, and natural completion', () => {
|
||||
it('uses the completed turn as one step when no MCP tools run', () => {
|
||||
const summary = summarizeCodexExecEvents(
|
||||
[
|
||||
{ type: 'thread.started', thread_id: 'thr_1' },
|
||||
|
|
@ -35,6 +35,52 @@ describe('Codex exec event parsing', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('uses completed MCP tool calls as loop steps', () => {
|
||||
const offsets = [115, 140, 175];
|
||||
const summary = summarizeCodexExecEvents(
|
||||
[
|
||||
{ type: 'turn.started' },
|
||||
{
|
||||
type: 'item.started',
|
||||
item: { id: 'call_1', type: 'mcp_tool_call', server: 'ktx', tool: 'search', arguments: {}, status: 'in_progress' },
|
||||
},
|
||||
{
|
||||
type: 'item.completed',
|
||||
item: { id: 'call_1', type: 'mcp_tool_call', server: 'ktx', tool: 'search', arguments: {}, status: 'completed' },
|
||||
},
|
||||
{
|
||||
type: 'item.started',
|
||||
item: { id: 'call_2', type: 'mcp_tool_call', server: 'ktx', tool: 'lookup', arguments: {}, status: 'in_progress' },
|
||||
},
|
||||
{
|
||||
type: 'item.completed',
|
||||
item: {
|
||||
id: 'call_2',
|
||||
type: 'mcp_tool_call',
|
||||
server: 'ktx',
|
||||
tool: 'lookup',
|
||||
arguments: {},
|
||||
status: 'failed',
|
||||
error: { message: 'denied' },
|
||||
},
|
||||
},
|
||||
{ type: 'item.completed', item: { id: 'item_1', type: 'agent_message', text: 'done' } },
|
||||
{ type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 1, cached_input_tokens: 0, reasoning_output_tokens: 0 } },
|
||||
],
|
||||
{ startedAt: 100, now: () => offsets.shift() ?? 175 },
|
||||
);
|
||||
|
||||
expect(summary).toEqual({
|
||||
finalText: 'done',
|
||||
stopReason: 'natural',
|
||||
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
||||
stepCount: 2,
|
||||
stepBoundariesMs: [15, 40],
|
||||
toolCallCount: 2,
|
||||
toolFailures: ['lookup: denied'],
|
||||
});
|
||||
});
|
||||
|
||||
it('maps turn failures into error stop reason', () => {
|
||||
const summary = summarizeCodexExecEvents([
|
||||
{ type: 'turn.started' },
|
||||
|
|
@ -54,24 +100,6 @@ describe('Codex exec event parsing', () => {
|
|||
expect(summary.stopReason).toBe('budget');
|
||||
});
|
||||
|
||||
it('counts SDK-shaped MCP tool calls and failed MCP tool calls', () => {
|
||||
const summary = summarizeCodexExecEvents([
|
||||
{ type: 'turn.started' },
|
||||
{
|
||||
type: 'item.started',
|
||||
item: { id: 'call_1', type: 'mcp_tool_call', server: 'ktx', tool: 'search', arguments: { query: 'revenue' }, status: 'in_progress' },
|
||||
},
|
||||
{
|
||||
type: 'item.completed',
|
||||
item: { id: 'call_1', type: 'mcp_tool_call', server: 'ktx', tool: 'search', arguments: { query: 'revenue' }, status: 'failed', error: { message: 'denied' } },
|
||||
},
|
||||
{ type: 'turn.completed' },
|
||||
]);
|
||||
|
||||
expect(summary.toolCallCount).toBe(1);
|
||||
expect(summary.toolFailures).toEqual(['search: denied']);
|
||||
});
|
||||
|
||||
it('throws a clear error for malformed JSONL lines', () => {
|
||||
expect(() => parseCodexExecEventLine('{not-json')).toThrow('Codex JSONL event stream was malformed');
|
||||
});
|
||||
|
|
|
|||
|
|
@ -169,6 +169,44 @@ describe('CodexKtxLlmRuntime', () => {
|
|||
expect(result.error?.message).toBe('boom');
|
||||
});
|
||||
|
||||
it('surfaces failed MCP tool calls as agent-loop errors', async () => {
|
||||
const runtime = new CodexKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'codex' },
|
||||
runner: runner([
|
||||
{ type: 'turn.started' },
|
||||
{ type: 'item.started', item: { type: 'mcp_tool_call', server: 'ktx', tool: 'search', status: 'in_progress' } },
|
||||
{
|
||||
type: 'item.completed',
|
||||
item: {
|
||||
type: 'mcp_tool_call',
|
||||
server: 'ktx',
|
||||
tool: 'search',
|
||||
status: 'failed',
|
||||
error: { message: 'denied' },
|
||||
},
|
||||
},
|
||||
{ type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 1 } },
|
||||
]),
|
||||
});
|
||||
|
||||
const result = await runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
stepBudget: 5,
|
||||
telemetryTags: {},
|
||||
toolSet: {},
|
||||
});
|
||||
|
||||
expect(result.stopReason).toBe('error');
|
||||
expect(result.error?.message).toBe('Codex runtime tool call failed: search: denied');
|
||||
expect(result.metrics).toMatchObject({
|
||||
stepCount: 1,
|
||||
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
||||
});
|
||||
});
|
||||
|
||||
it('probes Codex authentication through a minimal non-interactive turn', async () => {
|
||||
const fakeRunner = runner([
|
||||
{ type: 'turn.started' },
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue