feat: add codex llm backend

This commit is contained in:
Andrey Avtomonov 2026-06-01 17:22:24 +02:00
parent 21744fc520
commit 64b8a416fe
28 changed files with 1462 additions and 14 deletions

View file

@ -77,9 +77,10 @@ describe('createLocalBundleIngestRuntime', () => {
}),
).toThrow(
[
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, claude-code, or codex, or an injected agentRunner.',
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
` ktx setup --project-dir ${project.projectDir} --llm-backend claude-code --no-input`,
` ktx setup --project-dir ${project.projectDir} --llm-backend codex --llm-model gpt-5.3-codex --no-input`,
` ktx setup --project-dir ${project.projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --llm-model claude-sonnet-4-6 --no-input`,
].join('\n'),
);

View file

@ -0,0 +1,64 @@
import { describe, expect, it } from 'vitest';
import {
parseCodexExecEventLine,
summarizeCodexExecEvents,
} from '../../../src/context/llm/codex-exec-events.js';
describe('Codex exec event parsing', () => {
it('captures final agent text, usage, steps, and natural completion', () => {
const summary = summarizeCodexExecEvents(
[
{ type: 'thread.started', thread: { id: 'thr_1' } },
{ type: 'turn.started' },
{ type: 'item.completed', item: { id: 'item_1', type: 'agent_message', text: 'hello from codex' } },
{ type: 'turn.completed', usage: { input_tokens: 12, output_tokens: 5, total_tokens: 17 } },
],
{ startedAt: 100, now: () => 125 },
);
expect(summary).toEqual({
finalText: 'hello from codex',
stopReason: 'natural',
usage: { inputTokens: 12, outputTokens: 5, totalTokens: 17 },
stepCount: 1,
stepBoundariesMs: [25],
toolCallCount: 0,
toolFailures: [],
});
});
it('maps turn failures into error stop reason', () => {
const summary = summarizeCodexExecEvents([
{ type: 'turn.started' },
{ type: 'turn.failed', error: { message: 'Codex could not connect to required MCP server' } },
]);
expect(summary.stopReason).toBe('error');
expect(summary.error?.message).toContain('Codex could not connect to required MCP server');
});
it('maps max-turns terminal reasons into budget stop reason', () => {
const summary = summarizeCodexExecEvents([
{ type: 'turn.started' },
{ type: 'turn.completed', reason: 'max_turns', usage: { input_tokens: 1, output_tokens: 1 } },
]);
expect(summary.stopReason).toBe('budget');
});
it('counts MCP tool calls and failed MCP tool calls', () => {
const summary = summarizeCodexExecEvents([
{ type: 'turn.started' },
{ type: 'item.started', item: { id: 'call_1', type: 'mcp_tool_call', name: 'search' } },
{ type: 'item.completed', item: { id: 'call_1', type: 'mcp_tool_call', name: 'search', error: 'denied' } },
{ type: 'turn.completed' },
]);
expect(summary.toolCallCount).toBe(1);
expect(summary.toolFailures).toEqual(['search: denied']);
});
it('throws a clear error for malformed JSONL lines', () => {
expect(() => parseCodexExecEventLine('{not-json')).toThrow('Codex JSONL event stream was malformed');
});
});

View file

@ -0,0 +1,73 @@
import { describe, expect, it, vi } from 'vitest';
import { z } from 'zod';
import {
createCodexRuntimeMcpServer,
startCodexRuntimeMcpServer,
} from '../../../src/context/llm/codex-mcp-runtime-server.js';
describe('Codex runtime MCP server', () => {
it('registers runtime tools with markdown output', async () => {
const registered = new Map<
string,
{
config: { description?: string; inputSchema: unknown };
handler: (input: Record<string, unknown>) => Promise<unknown>;
}
>();
const server = createCodexRuntimeMcpServer({
server: {
registerTool(name, config, handler) {
registered.set(name, { config, handler });
},
},
toolSet: {
wiki_search: {
name: 'wiki_search',
description: 'Search the wiki',
inputSchema: z.object({ query: z.string() }),
execute: vi.fn(async () => ({ markdown: 'result markdown', structured: { matches: 1 } })),
},
},
});
expect(server).toBeDefined();
expect([...registered.keys()]).toEqual(['wiki_search']);
expect(registered.get('wiki_search')?.config).toMatchObject({
description: 'Search the wiki',
});
await expect(registered.get('wiki_search')?.handler({ query: 'revenue' })).resolves.toEqual({
content: [{ type: 'text', text: 'result markdown' }],
structuredContent: { matches: 1 },
});
});
it('starts loopback HTTP MCP with a bearer token and reports the runtime URL', async () => {
const close = vi.fn(async () => undefined);
const runServer = vi.fn(async () => ({
server: { address: () => ({ port: 4321 }) },
close,
}));
const handle = await startCodexRuntimeMcpServer({
projectDir: '/tmp/ktx-project',
toolSet: {},
runServer: runServer as never,
});
expect(handle.url).toBe('http://127.0.0.1:4321/mcp');
expect(handle.bearerTokenEnvVar).toBe('KTX_CODEX_RUNTIME_MCP_TOKEN');
expect(handle.bearerToken).toMatch(/^[a-f0-9]{64}$/);
expect(runServer).toHaveBeenCalledWith(
expect.objectContaining({
projectDir: '/tmp/ktx-project',
host: '127.0.0.1',
port: 0,
token: handle.bearerToken,
allowedHosts: ['127.0.0.1', 'localhost'],
allowedOrigins: [],
}),
);
await handle.close();
expect(close).toHaveBeenCalled();
});
});

View file

@ -0,0 +1,17 @@
import { describe, expect, it } from 'vitest';
import { resolveCodexModel } from '../../../src/context/llm/codex-models.js';
describe('resolveCodexModel', () => {
it.each([
['codex', 'gpt-5.3-codex'],
['default', 'gpt-5.3-codex'],
['gpt-5.3-codex', 'gpt-5.3-codex'],
['gpt-5.4', 'gpt-5.4'],
])('maps %s to %s', (input, expected) => {
expect(resolveCodexModel(input)).toBe(expected);
});
it.each(['', ' ', 'sonnet', 'claude-sonnet-4-6'])('rejects %s', (input) => {
expect(() => resolveCodexModel(input)).toThrow('Unsupported Codex model');
});
});

View file

@ -0,0 +1,50 @@
import { describe, expect, it } from 'vitest';
import { buildCodexRuntimeConfig } from '../../../src/context/llm/codex-runtime-config.js';
describe('buildCodexRuntimeConfig', () => {
it('builds deny-by-default config without MCP tools', () => {
expect(buildCodexRuntimeConfig({ model: 'gpt-5.3-codex' })).toEqual({
configOverrides: {
model: 'gpt-5.3-codex',
approval_policy: 'never',
sandbox_mode: 'read-only',
web_search: 'disabled',
history: { persistence: 'none' },
},
env: {},
});
});
it('adds only the temporary ktx MCP server and exact enabled tools', () => {
expect(
buildCodexRuntimeConfig({
model: 'gpt-5.3-codex',
mcp: {
url: 'http://127.0.0.1:4567/mcp',
bearerTokenEnvVar: 'KTX_CODEX_RUNTIME_MCP_TOKEN',
bearerToken: 'secret-token',
toolNames: ['sl_read_source', 'wiki_search'],
},
}),
).toEqual({
configOverrides: {
model: 'gpt-5.3-codex',
approval_policy: 'never',
sandbox_mode: 'read-only',
web_search: 'disabled',
history: { persistence: 'none' },
mcp_servers: {
ktx: {
url: 'http://127.0.0.1:4567/mcp',
bearer_token_env_var: 'KTX_CODEX_RUNTIME_MCP_TOKEN',
enabled_tools: ['sl_read_source', 'wiki_search'],
required: true,
},
},
},
env: {
KTX_CODEX_RUNTIME_MCP_TOKEN: 'secret-token',
},
});
});
});

View file

@ -0,0 +1,187 @@
import { describe, expect, it, vi } from 'vitest';
import { z } from 'zod';
import {
CodexKtxLlmRuntime,
runCodexAuthProbe,
} from '../../../src/context/llm/codex-runtime.js';
async function* events(items: unknown[]) {
for (const item of items) {
yield item;
}
}
function runner(items: unknown[]) {
return {
runStreamed: vi.fn(async () => events(items)),
};
}
describe('CodexKtxLlmRuntime', () => {
it('generates text with the role-selected model and metrics', async () => {
const onMetrics = vi.fn();
const fakeRunner = runner([
{ type: 'turn.started' },
{ type: 'item.completed', item: { type: 'agent_message', text: 'hello' } },
{ type: 'turn.completed', usage: { input_tokens: 3, output_tokens: 4, total_tokens: 7 } },
]);
const runtime = new CodexKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'codex', triage: 'gpt-5.4' },
runner: fakeRunner,
});
await expect(runtime.generateText({ role: 'triage', system: 'system', prompt: 'prompt', onMetrics })).resolves.toBe('hello');
expect(fakeRunner.runStreamed).toHaveBeenCalledWith(
expect.objectContaining({
projectDir: '/tmp/project',
model: 'gpt-5.4',
prompt: 'system\n\nprompt',
}),
);
expect(onMetrics).toHaveBeenCalledWith(expect.objectContaining({ usage: { inputTokens: 3, outputTokens: 4, totalTokens: 7 } }));
});
it('generates and validates structured output', async () => {
const fakeRunner = runner([
{ type: 'turn.started' },
{ type: 'item.completed', item: { type: 'agent_message', text: '{"answer":"yes"}' } },
{ type: 'turn.completed' },
]);
const runtime = new CodexKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'codex' },
runner: fakeRunner,
});
await expect(
runtime.generateObject({
role: 'default',
prompt: 'json',
schema: z.object({ answer: z.string() }),
}),
).resolves.toEqual({ answer: 'yes' });
expect(fakeRunner.runStreamed).toHaveBeenCalledWith(
expect.objectContaining({
outputSchema: expect.objectContaining({ type: 'object' }),
}),
);
});
it('returns a structured-output error when Codex final text is invalid JSON', async () => {
const fakeRunner = runner([
{ type: 'turn.started' },
{ type: 'item.completed', item: { type: 'agent_message', text: 'not json' } },
{ type: 'turn.completed' },
]);
const runtime = new CodexKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'codex' },
runner: fakeRunner,
});
await expect(
runtime.generateObject({
role: 'default',
prompt: 'json',
schema: z.object({ answer: z.string() }),
}),
).rejects.toThrow('Codex structured output failed validation');
});
it('starts and closes a temporary MCP server for tool-backed agent loops', async () => {
const close = vi.fn(async () => undefined);
const startMcpServer = vi.fn(async () => ({
url: 'http://127.0.0.1:4321/mcp',
bearerTokenEnvVar: 'KTX_CODEX_RUNTIME_MCP_TOKEN' as const,
bearerToken: 'token',
close,
}));
const fakeRunner = runner([
{ type: 'turn.started' },
{ type: 'item.started', item: { type: 'mcp_tool_call', name: 'wiki_search' } },
{ type: 'item.completed', item: { type: 'agent_message', text: 'done' } },
{ type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 } },
]);
const runtime = new CodexKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'codex' },
runner: fakeRunner,
startMcpServer,
});
const onStepFinish = vi.fn();
const result = await runtime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
stepBudget: 5,
telemetryTags: {},
onStepFinish,
toolSet: {
wiki_search: {
name: 'wiki_search',
description: 'Search wiki',
inputSchema: z.object({ query: z.string() }),
execute: vi.fn(),
},
},
});
expect(result.stopReason).toBe('natural');
expect(result.metrics).toMatchObject({ stepCount: 1, usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 } });
expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 5 });
expect(startMcpServer).toHaveBeenCalledWith({ projectDir: '/tmp/project', toolSet: expect.any(Object) });
expect(fakeRunner.runStreamed).toHaveBeenCalledWith(
expect.objectContaining({
env: { KTX_CODEX_RUNTIME_MCP_TOKEN: 'token' },
configOverrides: expect.objectContaining({
mcp_servers: expect.objectContaining({
ktx: expect.objectContaining({
url: 'http://127.0.0.1:4321/mcp',
enabled_tools: ['wiki_search'],
required: true,
}),
}),
}),
}),
);
expect(close).toHaveBeenCalled();
});
it('returns error stop reason on turn failure', async () => {
const runtime = new CodexKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'codex' },
runner: runner([{ type: 'turn.failed', error: { message: 'boom' } }]),
});
const result = await runtime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
stepBudget: 5,
telemetryTags: {},
toolSet: {},
});
expect(result.stopReason).toBe('error');
expect(result.error?.message).toBe('boom');
});
it('probes Codex authentication through a minimal non-interactive turn', async () => {
const fakeRunner = runner([
{ type: 'turn.started' },
{ type: 'item.completed', item: { type: 'agent_message', text: 'ok' } },
{ type: 'turn.completed' },
]);
await expect(
runCodexAuthProbe({
projectDir: '/tmp/project',
model: 'codex',
runner: fakeRunner,
}),
).resolves.toEqual({ ok: true });
});
});

View file

@ -0,0 +1,79 @@
import { describe, expect, it, vi } from 'vitest';
const sdkMock = vi.hoisted(() => {
const events = (async function* () {
yield { type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 2, total_tokens: 3 } };
})();
const observedEnv: Array<string | undefined> = [];
const runStreamed = vi.fn(async () => ({ events }));
const startThread = vi.fn(() => ({ runStreamed }));
const Codex = vi.fn(function Codex(this: { startThread: typeof startThread }, options?: unknown) {
observedEnv.push(process.env.KTX_CODEX_RUNTIME_MCP_TOKEN);
Object.assign(this, { options, startThread });
});
return { Codex, startThread, runStreamed, observedEnv };
});
vi.mock('@openai/codex-sdk', () => ({ Codex: sdkMock.Codex }));
import { CodexSdkCliRunner } from '../../../src/context/llm/codex-sdk-runner.js';
async function collectAsync<T>(items: AsyncIterable<T>): Promise<T[]> {
const collected: T[] = [];
for await (const item of items) {
collected.push(item);
}
return collected;
}
describe('CodexSdkCliRunner', () => {
it('constructs Codex with per-run config and streams thread events', async () => {
const runner = new CodexSdkCliRunner();
const previousToken = process.env.KTX_CODEX_RUNTIME_MCP_TOKEN;
delete process.env.KTX_CODEX_RUNTIME_MCP_TOKEN;
const outputSchema = {
type: 'object',
properties: { answer: { type: 'string' } },
required: ['answer'],
additionalProperties: false,
};
try {
const events = await runner.runStreamed({
projectDir: '/tmp/ktx-project',
model: 'gpt-5.3-codex',
prompt: 'Return JSON.',
configOverrides: {
approval_policy: 'never',
sandbox_mode: 'read-only',
},
env: { KTX_CODEX_RUNTIME_MCP_TOKEN: 'token' },
outputSchema,
});
expect(sdkMock.Codex).toHaveBeenCalledWith({
config: {
approval_policy: 'never',
sandbox_mode: 'read-only',
model: 'gpt-5.3-codex',
},
});
expect(sdkMock.observedEnv).toEqual(['token']);
expect(process.env.KTX_CODEX_RUNTIME_MCP_TOKEN).toBeUndefined();
expect(sdkMock.startThread).toHaveBeenCalledWith({
workingDirectory: '/tmp/ktx-project',
skipGitRepoCheck: true,
});
expect(sdkMock.runStreamed).toHaveBeenCalledWith('Return JSON.', { outputSchema });
await expect(collectAsync(events)).resolves.toEqual([
{ type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 2, total_tokens: 3 } },
]);
} finally {
if (previousToken === undefined) {
delete process.env.KTX_CODEX_RUNTIME_MCP_TOKEN;
} else {
process.env.KTX_CODEX_RUNTIME_MCP_TOKEN = previousToken;
}
}
});
});

View file

@ -22,4 +22,25 @@ describe('local KTX LLM runtime config', () => {
}),
).toBeNull();
});
it('creates a Codex runtime for codex backend without creating an AI SDK provider', () => {
const runtime = createLocalKtxLlmRuntimeFromConfig(
{
provider: { backend: 'codex' },
models: { default: 'codex', triage: 'gpt-5.4' },
},
{ env: {}, projectDir: '/tmp/project', createCodexRuntime: vi.fn((deps) => ({ deps }) as never) },
);
expect(runtime).toMatchObject({ deps: expect.objectContaining({ projectDir: '/tmp/project' }) });
});
it('returns null from the AI SDK provider factory for codex backend', () => {
expect(
createLocalKtxLlmProviderFromConfig({
provider: { backend: 'codex' },
models: { default: 'codex' },
}),
).toBeNull();
});
});

View file

@ -231,6 +231,31 @@ llm:
});
});
it('parses Codex as a first-class LLM backend', () => {
const config = parseKtxProjectConfig(`
llm:
provider:
backend: codex
models:
default: gpt-5.3-codex
triage: gpt-5.3-codex
candidateExtraction: gpt-5.3-codex
curator: gpt-5.3-codex
reconcile: gpt-5.3-codex
repair: gpt-5.3-codex
`);
expect(config.llm.provider.backend).toBe('codex');
expect(config.llm.models).toEqual({
default: 'gpt-5.3-codex',
triage: 'gpt-5.3-codex',
candidateExtraction: 'gpt-5.3-codex',
curator: 'gpt-5.3-codex',
reconcile: 'gpt-5.3-codex',
repair: 'gpt-5.3-codex',
});
});
it('parses gateway LLM, OpenAI scan embeddings, and sentence-transformers ingest embeddings', () => {
const config = parseKtxProjectConfig(`
llm:
@ -530,7 +555,7 @@ describe('generateKtxProjectConfigJsonSchema', () => {
const llm = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).llm;
const provider = llm?.properties?.provider as { properties?: Record<string, unknown> };
const backend = provider?.properties?.backend as { enum?: readonly string[] };
expect(backend?.enum).toEqual(['none', 'anthropic', 'vertex', 'gateway', 'claude-code']);
expect(backend?.enum).toEqual(['none', 'anthropic', 'vertex', 'gateway', 'claude-code', 'codex']);
const storage = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).storage;
const state = storage?.properties?.state as { enum?: readonly string[] };