fix: drive codex loop metrics from mcp events

This commit is contained in:
Andrey Avtomonov 2026-06-01 18:06:37 +02:00
parent 1430ca49eb
commit f27fc9c9a5
4 changed files with 132 additions and 49 deletions

View file

@ -81,7 +81,8 @@ export function summarizeCodexExecEvents(
let finalText = '';
let stopReason: RunLoopStopReason = 'natural';
let usage: LlmTokenUsage = {};
let stepCount = 0;
let turnCount = 0;
let completedToolStepCount = 0;
const stepBoundariesMs: number[] = [];
let toolCallCount = 0;
const toolFailures: string[] = [];
@ -95,13 +96,38 @@ export function summarizeCodexExecEvents(
}
if (eventType === 'turn.started') {
stepCount += 1;
turnCount += 1;
continue;
}
const item = record(eventRecord.item);
const itemType = text(item?.type);
if (eventType === 'item.started' && itemType === 'mcp_tool_call') {
toolCallCount += 1;
continue;
}
if (eventType === 'item.completed' && itemType === 'mcp_tool_call') {
completedToolStepCount += 1;
stepBoundariesMs.push(now() - startedAt);
if (item?.error !== undefined || item?.status === 'failed') {
const name = text(item.name) ?? text(item.tool) ?? text(item.tool_name) ?? 'unknown';
toolFailures.push(`${name}: ${errorMessageFrom(item.error)}`);
}
continue;
}
if (eventType === 'item.completed' && itemType === 'agent_message') {
finalText = text(item?.text) ?? finalText;
continue;
}
if (eventType === 'turn.completed') {
usage = usageFrom(eventRecord.usage);
stepBoundariesMs.push(now() - startedAt);
if (completedToolStepCount === 0) {
stepBoundariesMs.push(now() - startedAt);
}
stopReason = stopReasonFrom(eventRecord.reason ?? eventRecord.stop_reason ?? eventRecord.terminal_reason);
continue;
}
@ -109,28 +135,6 @@ export function summarizeCodexExecEvents(
if (eventType === 'turn.failed' || eventType === 'error') {
stopReason = 'error';
error = new Error(errorMessageFrom(eventRecord.error ?? eventRecord.message));
continue;
}
const item = record(eventRecord.item);
const itemType = text(item?.type);
if (!item || !itemType) {
continue;
}
if (eventType === 'item.completed' && itemType === 'agent_message') {
finalText = text(item.text) ?? finalText;
continue;
}
if (eventType === 'item.started' && itemType === 'mcp_tool_call') {
toolCallCount += 1;
continue;
}
if (eventType === 'item.completed' && itemType === 'mcp_tool_call' && item.error !== undefined) {
const name = text(item.name) ?? text(item.tool) ?? text(item.tool_name) ?? 'unknown';
toolFailures.push(`${name}: ${errorMessageFrom(item.error)}`);
}
}
@ -138,7 +142,7 @@ export function summarizeCodexExecEvents(
finalText,
stopReason,
usage,
stepCount,
stepCount: completedToolStepCount > 0 ? completedToolStepCount : turnCount,
stepBoundariesMs,
toolCallCount,
toolFailures,

View file

@ -46,9 +46,20 @@ function metrics(summary: CodexExecEventSummary, startedAt: number): { totalMs:
return { totalMs: Date.now() - startedAt, usage: summary.usage };
}
function assertSuccessfulText(summary: CodexExecEventSummary): string {
function summaryError(summary: CodexExecEventSummary): Error | undefined {
if (summary.error) {
throw summary.error;
return summary.error;
}
if (summary.toolFailures.length > 0) {
return new Error(`Codex runtime tool call failed: ${summary.toolFailures.join('; ')}`);
}
return undefined;
}
function assertSuccessfulText(summary: CodexExecEventSummary): string {
const error = summaryError(summary);
if (error) {
throw error;
}
if (!summary.finalText.trim()) {
throw new Error('Codex completed without an agent message');
@ -215,9 +226,11 @@ export class CodexKtxLlmRuntime implements KtxLlmRuntimePort {
);
}
}
const error = summaryError(summary);
const stopReason = error ? 'error' : summary.stopReason;
return {
stopReason: summary.stopReason,
...(summary.stopReason === 'error' && summary.error ? { error: summary.error } : {}),
stopReason,
...(stopReason === 'error' && error ? { error } : {}),
metrics: {
totalMs: Date.now() - startedAt,
usage: summary.usage,