diff --git a/packages/cli/src/context/llm/codex-exec-events.ts b/packages/cli/src/context/llm/codex-exec-events.ts index 11982279..7c97a10e 100644 --- a/packages/cli/src/context/llm/codex-exec-events.ts +++ b/packages/cli/src/context/llm/codex-exec-events.ts @@ -81,7 +81,8 @@ export function summarizeCodexExecEvents( let finalText = ''; let stopReason: RunLoopStopReason = 'natural'; let usage: LlmTokenUsage = {}; - let stepCount = 0; + let turnCount = 0; + let completedToolStepCount = 0; const stepBoundariesMs: number[] = []; let toolCallCount = 0; const toolFailures: string[] = []; @@ -95,13 +96,38 @@ export function summarizeCodexExecEvents( } if (eventType === 'turn.started') { - stepCount += 1; + turnCount += 1; + continue; + } + + const item = record(eventRecord.item); + const itemType = text(item?.type); + + if (eventType === 'item.started' && itemType === 'mcp_tool_call') { + toolCallCount += 1; + continue; + } + + if (eventType === 'item.completed' && itemType === 'mcp_tool_call') { + completedToolStepCount += 1; + stepBoundariesMs.push(now() - startedAt); + if (item?.error !== undefined || item?.status === 'failed') { + const name = text(item.name) ?? text(item.tool) ?? text(item.tool_name) ?? 'unknown'; + toolFailures.push(`${name}: ${errorMessageFrom(item.error)}`); + } + continue; + } + + if (eventType === 'item.completed' && itemType === 'agent_message') { + finalText = text(item?.text) ?? finalText; continue; } if (eventType === 'turn.completed') { usage = usageFrom(eventRecord.usage); - stepBoundariesMs.push(now() - startedAt); + if (completedToolStepCount === 0) { + stepBoundariesMs.push(now() - startedAt); + } stopReason = stopReasonFrom(eventRecord.reason ?? eventRecord.stop_reason ?? eventRecord.terminal_reason); continue; } @@ -109,28 +135,6 @@ export function summarizeCodexExecEvents( if (eventType === 'turn.failed' || eventType === 'error') { stopReason = 'error'; error = new Error(errorMessageFrom(eventRecord.error ?? eventRecord.message)); - continue; - } - - const item = record(eventRecord.item); - const itemType = text(item?.type); - if (!item || !itemType) { - continue; - } - - if (eventType === 'item.completed' && itemType === 'agent_message') { - finalText = text(item.text) ?? finalText; - continue; - } - - if (eventType === 'item.started' && itemType === 'mcp_tool_call') { - toolCallCount += 1; - continue; - } - - if (eventType === 'item.completed' && itemType === 'mcp_tool_call' && item.error !== undefined) { - const name = text(item.name) ?? text(item.tool) ?? text(item.tool_name) ?? 'unknown'; - toolFailures.push(`${name}: ${errorMessageFrom(item.error)}`); } } @@ -138,7 +142,7 @@ export function summarizeCodexExecEvents( finalText, stopReason, usage, - stepCount, + stepCount: completedToolStepCount > 0 ? completedToolStepCount : turnCount, stepBoundariesMs, toolCallCount, toolFailures, diff --git a/packages/cli/src/context/llm/codex-runtime.ts b/packages/cli/src/context/llm/codex-runtime.ts index bea17ee0..08ae4261 100644 --- a/packages/cli/src/context/llm/codex-runtime.ts +++ b/packages/cli/src/context/llm/codex-runtime.ts @@ -46,9 +46,20 @@ function metrics(summary: CodexExecEventSummary, startedAt: number): { totalMs: return { totalMs: Date.now() - startedAt, usage: summary.usage }; } -function assertSuccessfulText(summary: CodexExecEventSummary): string { +function summaryError(summary: CodexExecEventSummary): Error | undefined { if (summary.error) { - throw summary.error; + return summary.error; + } + if (summary.toolFailures.length > 0) { + return new Error(`Codex runtime tool call failed: ${summary.toolFailures.join('; ')}`); + } + return undefined; +} + +function assertSuccessfulText(summary: CodexExecEventSummary): string { + const error = summaryError(summary); + if (error) { + throw error; } if (!summary.finalText.trim()) { throw new Error('Codex completed without an agent message'); @@ -215,9 +226,11 @@ export class CodexKtxLlmRuntime implements KtxLlmRuntimePort { ); } } + const error = summaryError(summary); + const stopReason = error ? 'error' : summary.stopReason; return { - stopReason: summary.stopReason, - ...(summary.stopReason === 'error' && summary.error ? { error: summary.error } : {}), + stopReason, + ...(stopReason === 'error' && error ? { error } : {}), metrics: { totalMs: Date.now() - startedAt, usage: summary.usage, diff --git a/packages/cli/test/context/llm/codex-exec-events.test.ts b/packages/cli/test/context/llm/codex-exec-events.test.ts index 03bbf7b1..60e98366 100644 --- a/packages/cli/test/context/llm/codex-exec-events.test.ts +++ b/packages/cli/test/context/llm/codex-exec-events.test.ts @@ -5,7 +5,7 @@ import { } from '../../../src/context/llm/codex-exec-events.js'; describe('Codex exec event parsing', () => { - it('captures final agent text, SDK usage, steps, and natural completion', () => { + it('uses the completed turn as one step when no MCP tools run', () => { const summary = summarizeCodexExecEvents( [ { type: 'thread.started', thread_id: 'thr_1' }, @@ -35,6 +35,52 @@ describe('Codex exec event parsing', () => { }); }); + it('uses completed MCP tool calls as loop steps', () => { + const offsets = [115, 140, 175]; + const summary = summarizeCodexExecEvents( + [ + { type: 'turn.started' }, + { + type: 'item.started', + item: { id: 'call_1', type: 'mcp_tool_call', server: 'ktx', tool: 'search', arguments: {}, status: 'in_progress' }, + }, + { + type: 'item.completed', + item: { id: 'call_1', type: 'mcp_tool_call', server: 'ktx', tool: 'search', arguments: {}, status: 'completed' }, + }, + { + type: 'item.started', + item: { id: 'call_2', type: 'mcp_tool_call', server: 'ktx', tool: 'lookup', arguments: {}, status: 'in_progress' }, + }, + { + type: 'item.completed', + item: { + id: 'call_2', + type: 'mcp_tool_call', + server: 'ktx', + tool: 'lookup', + arguments: {}, + status: 'failed', + error: { message: 'denied' }, + }, + }, + { type: 'item.completed', item: { id: 'item_1', type: 'agent_message', text: 'done' } }, + { type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 1, cached_input_tokens: 0, reasoning_output_tokens: 0 } }, + ], + { startedAt: 100, now: () => offsets.shift() ?? 175 }, + ); + + expect(summary).toEqual({ + finalText: 'done', + stopReason: 'natural', + usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }, + stepCount: 2, + stepBoundariesMs: [15, 40], + toolCallCount: 2, + toolFailures: ['lookup: denied'], + }); + }); + it('maps turn failures into error stop reason', () => { const summary = summarizeCodexExecEvents([ { type: 'turn.started' }, @@ -54,24 +100,6 @@ describe('Codex exec event parsing', () => { expect(summary.stopReason).toBe('budget'); }); - it('counts SDK-shaped MCP tool calls and failed MCP tool calls', () => { - const summary = summarizeCodexExecEvents([ - { type: 'turn.started' }, - { - type: 'item.started', - item: { id: 'call_1', type: 'mcp_tool_call', server: 'ktx', tool: 'search', arguments: { query: 'revenue' }, status: 'in_progress' }, - }, - { - type: 'item.completed', - item: { id: 'call_1', type: 'mcp_tool_call', server: 'ktx', tool: 'search', arguments: { query: 'revenue' }, status: 'failed', error: { message: 'denied' } }, - }, - { type: 'turn.completed' }, - ]); - - expect(summary.toolCallCount).toBe(1); - expect(summary.toolFailures).toEqual(['search: denied']); - }); - it('throws a clear error for malformed JSONL lines', () => { expect(() => parseCodexExecEventLine('{not-json')).toThrow('Codex JSONL event stream was malformed'); }); diff --git a/packages/cli/test/context/llm/codex-runtime.test.ts b/packages/cli/test/context/llm/codex-runtime.test.ts index 3218ca5f..9e98ab1a 100644 --- a/packages/cli/test/context/llm/codex-runtime.test.ts +++ b/packages/cli/test/context/llm/codex-runtime.test.ts @@ -169,6 +169,44 @@ describe('CodexKtxLlmRuntime', () => { expect(result.error?.message).toBe('boom'); }); + it('surfaces failed MCP tool calls as agent-loop errors', async () => { + const runtime = new CodexKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'codex' }, + runner: runner([ + { type: 'turn.started' }, + { type: 'item.started', item: { type: 'mcp_tool_call', server: 'ktx', tool: 'search', status: 'in_progress' } }, + { + type: 'item.completed', + item: { + type: 'mcp_tool_call', + server: 'ktx', + tool: 'search', + status: 'failed', + error: { message: 'denied' }, + }, + }, + { type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 1 } }, + ]), + }); + + const result = await runtime.runAgentLoop({ + modelRole: 'default', + systemPrompt: 'system', + userPrompt: 'user', + stepBudget: 5, + telemetryTags: {}, + toolSet: {}, + }); + + expect(result.stopReason).toBe('error'); + expect(result.error?.message).toBe('Codex runtime tool call failed: search: denied'); + expect(result.metrics).toMatchObject({ + stepCount: 1, + usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }, + }); + }); + it('probes Codex authentication through a minimal non-interactive turn', async () => { const fakeRunner = runner([ { type: 'turn.started' },