mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-19 08:28:06 +02:00
feat: add codex llm backend for ktx runtime work (#253)
* feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that *-codex ids require API-key auth. * fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-*/codex-* ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior.
This commit is contained in:
parent
74c6076b72
commit
494618ab14
41 changed files with 2544 additions and 30 deletions
|
|
@ -611,9 +611,10 @@ function nextLocalJobId(): string {
|
|||
|
||||
function localIngestLlmProviderGuardMessage(projectDir: string): string {
|
||||
return [
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
|
||||
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, claude-code, or codex, or an injected agentRunner.',
|
||||
'Configure a local Claude Code/Codex session or API-backed LLM, then rerun ingest:',
|
||||
` ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`,
|
||||
` ktx setup --project-dir ${projectDir} --llm-backend codex --llm-model gpt-5.5 --no-input`,
|
||||
` ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --llm-model claude-sonnet-4-6 --no-input`,
|
||||
].join('\n');
|
||||
}
|
||||
|
|
|
|||
194
packages/cli/src/context/llm/codex-exec-events.ts
Normal file
194
packages/cli/src/context/llm/codex-exec-events.ts
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
import type { LlmTokenUsage, RunLoopStopReason } from './runtime-port.js';
|
||||
|
||||
export interface CodexExecEventSummary {
|
||||
finalText: string;
|
||||
stopReason: RunLoopStopReason;
|
||||
usage: LlmTokenUsage;
|
||||
stepCount: number;
|
||||
stepBoundariesMs: number[];
|
||||
toolCallCount: number;
|
||||
toolFailures: string[];
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
interface CodexEventParseOptions {
|
||||
startedAt?: number;
|
||||
now?: () => number;
|
||||
}
|
||||
|
||||
function record(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === 'object' ? (value as Record<string, unknown>) : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Codex thread items that represent a discrete agent action consuming one loop
|
||||
* step. The step budget caps the total number of these regardless of which
|
||||
* capability the agent reaches for, so built-in `command_execution` (and any
|
||||
* file/web action the public Codex surface still exposes) count alongside our
|
||||
* own `mcp_tool_call` items rather than only the MCP ones.
|
||||
*/
|
||||
const AGENT_STEP_ITEM_TYPES = new Set(['command_execution', 'mcp_tool_call', 'file_change', 'web_search']);
|
||||
|
||||
export function isCompletedAgentStep(event: unknown): boolean {
|
||||
const eventRecord = record(event);
|
||||
if (eventRecord?.type !== 'item.completed') {
|
||||
return false;
|
||||
}
|
||||
const itemType = record(eventRecord.item)?.type;
|
||||
return typeof itemType === 'string' && AGENT_STEP_ITEM_TYPES.has(itemType);
|
||||
}
|
||||
|
||||
function text(value: unknown): string | undefined {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value : undefined;
|
||||
}
|
||||
|
||||
function numberValue(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function usageFrom(value: unknown): LlmTokenUsage {
|
||||
const usage = record(value);
|
||||
if (!usage) {
|
||||
return {};
|
||||
}
|
||||
const inputTokens = numberValue(usage.input_tokens ?? usage.inputTokens);
|
||||
const outputTokens = numberValue(usage.output_tokens ?? usage.outputTokens);
|
||||
const explicitTotalTokens = numberValue(usage.total_tokens ?? usage.totalTokens);
|
||||
const totalTokens =
|
||||
explicitTotalTokens ??
|
||||
(inputTokens !== undefined && outputTokens !== undefined ? inputTokens + outputTokens : undefined);
|
||||
return {
|
||||
...(inputTokens !== undefined ? { inputTokens } : {}),
|
||||
...(outputTokens !== undefined ? { outputTokens } : {}),
|
||||
...(totalTokens !== undefined ? { totalTokens } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function stopReasonFrom(value: unknown): RunLoopStopReason {
|
||||
const reason = text(value)?.toLowerCase();
|
||||
if (reason && /(budget|max_turn|max-turn|limit)/.test(reason)) {
|
||||
return 'budget';
|
||||
}
|
||||
return 'natural';
|
||||
}
|
||||
|
||||
function errorMessageFrom(value: unknown): string {
|
||||
if (value instanceof Error) {
|
||||
return value.message;
|
||||
}
|
||||
const asRecord = record(value);
|
||||
const message = text(asRecord?.message);
|
||||
return message ?? text(value) ?? 'Codex turn failed';
|
||||
}
|
||||
|
||||
/**
|
||||
* Codex serializes API failures as a JSON envelope inside the event message
|
||||
* (e.g. `{"type":"error","status":400,"error":{"message":"…"}}`). Surface the
|
||||
* human-readable inner message so callers don't leak raw JSON; pass plain
|
||||
* strings through unchanged.
|
||||
*/
|
||||
function unwrapCodexApiErrorMessage(raw: string): string {
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed.startsWith('{')) {
|
||||
return raw;
|
||||
}
|
||||
try {
|
||||
const parsed = record(JSON.parse(trimmed));
|
||||
return text(record(parsed?.error)?.message) ?? text(parsed?.message) ?? raw;
|
||||
} catch {
|
||||
return raw;
|
||||
}
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function parseCodexExecEventLine(line: string): unknown {
|
||||
try {
|
||||
return JSON.parse(line) as unknown;
|
||||
} catch (error) {
|
||||
throw new Error(`Codex JSONL event stream was malformed: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function summarizeCodexExecEvents(
|
||||
events: Iterable<unknown>,
|
||||
options: CodexEventParseOptions = {},
|
||||
): CodexExecEventSummary {
|
||||
const startedAt = options.startedAt ?? Date.now();
|
||||
const now = options.now ?? Date.now;
|
||||
let finalText = '';
|
||||
let stopReason: RunLoopStopReason = 'natural';
|
||||
let usage: LlmTokenUsage = {};
|
||||
let turnCount = 0;
|
||||
let completedStepCount = 0;
|
||||
const stepBoundariesMs: number[] = [];
|
||||
let toolCallCount = 0;
|
||||
const toolFailures: string[] = [];
|
||||
let error: Error | undefined;
|
||||
|
||||
for (const event of events) {
|
||||
const eventRecord = record(event);
|
||||
const eventType = text(eventRecord?.type);
|
||||
if (!eventRecord || !eventType) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'turn.started') {
|
||||
turnCount += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const item = record(eventRecord.item);
|
||||
const itemType = text(item?.type);
|
||||
|
||||
if (eventType === 'item.started' && itemType === 'mcp_tool_call') {
|
||||
toolCallCount += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isCompletedAgentStep(event)) {
|
||||
completedStepCount += 1;
|
||||
stepBoundariesMs.push(now() - startedAt);
|
||||
// Only MCP tool calls fail the loop: a non-zero `command_execution` exit
|
||||
// is normal agent exploration, not a runtime error. `status` is the
|
||||
// authoritative signal (the SDK always sets it); the SDK also serializes
|
||||
// `error: null` on successful calls, so an explicit-null `error` must NOT
|
||||
// be read as a failure — only a populated error object counts.
|
||||
if (itemType === 'mcp_tool_call' && (item?.status === 'failed' || (item?.error !== undefined && item?.error !== null))) {
|
||||
const name = text(item?.name) ?? text(item?.tool) ?? text(item?.tool_name) ?? 'unknown';
|
||||
toolFailures.push(`${name}: ${errorMessageFrom(item?.error)}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'item.completed' && itemType === 'agent_message') {
|
||||
finalText = text(item?.text) ?? finalText;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'turn.completed') {
|
||||
usage = usageFrom(eventRecord.usage);
|
||||
if (completedStepCount === 0) {
|
||||
stepBoundariesMs.push(now() - startedAt);
|
||||
}
|
||||
stopReason = stopReasonFrom(eventRecord.reason ?? eventRecord.stop_reason ?? eventRecord.terminal_reason);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eventType === 'turn.failed' || eventType === 'error') {
|
||||
stopReason = 'error';
|
||||
error = new Error(unwrapCodexApiErrorMessage(errorMessageFrom(eventRecord.error ?? eventRecord.message)));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
finalText,
|
||||
stopReason,
|
||||
usage,
|
||||
stepCount: completedStepCount > 0 ? completedStepCount : turnCount,
|
||||
stepBoundariesMs,
|
||||
toolCallCount,
|
||||
toolFailures,
|
||||
...(error ? { error } : {}),
|
||||
};
|
||||
}
|
||||
9
packages/cli/src/context/llm/codex-isolation.ts
Normal file
9
packages/cli/src/context/llm/codex-isolation.ts
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
export const CODEX_ISOLATION_WARNING =
|
||||
'Codex backend isolation is limited by the public Codex SDK/CLI surface: ktx restricts the runtime MCP server to the current ktx tool set, disables Codex web search, asks for a read-only sandbox, and sets approval_policy=never, but Codex may still load user Codex config and built-in command execution or read-only file capabilities.';
|
||||
|
||||
export const CODEX_ISOLATION_WARNING_FIX =
|
||||
'Use llm.provider.backend: claude-code when you need stricter Claude-Code-style runtime tool isolation, or remove host Codex MCP/tool config before running untrusted prompts through the codex backend.';
|
||||
|
||||
export function formatCodexIsolationWarning(): string {
|
||||
return `${CODEX_ISOLATION_WARNING} ${CODEX_ISOLATION_WARNING_FIX}`;
|
||||
}
|
||||
87
packages/cli/src/context/llm/codex-mcp-runtime-server.ts
Normal file
87
packages/cli/src/context/llm/codex-mcp-runtime-server.ts
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
import { randomBytes } from 'node:crypto';
|
||||
import type { Server } from 'node:http';
|
||||
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import type { KtxMcpServerLike } from '../mcp/types.js';
|
||||
import { runKtxMcpHttpServer, type KtxMcpHttpServerHandle } from '../../mcp-http-server.js';
|
||||
import type { KtxRuntimeToolSet } from './runtime-port.js';
|
||||
import { normalizeKtxRuntimeToolOutput } from './runtime-tools.js';
|
||||
|
||||
/** @internal */
|
||||
export interface CreateCodexRuntimeMcpServerInput {
|
||||
server?: KtxMcpServerLike;
|
||||
toolSet: KtxRuntimeToolSet;
|
||||
}
|
||||
|
||||
export interface CodexRuntimeMcpServerHandle {
|
||||
url: string;
|
||||
bearerTokenEnvVar: 'KTX_CODEX_RUNTIME_MCP_TOKEN';
|
||||
bearerToken: string;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
type RunServer = typeof runKtxMcpHttpServer;
|
||||
|
||||
export interface StartCodexRuntimeMcpServerInput {
|
||||
projectDir: string;
|
||||
toolSet: KtxRuntimeToolSet;
|
||||
runServer?: RunServer;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function createCodexRuntimeMcpServer(input: CreateCodexRuntimeMcpServerInput): KtxMcpServerLike {
|
||||
const server =
|
||||
input.server ??
|
||||
(new McpServer({
|
||||
name: 'ktx-runtime',
|
||||
version: '0.0.0',
|
||||
}) as KtxMcpServerLike);
|
||||
|
||||
for (const descriptor of Object.values(input.toolSet)) {
|
||||
server.registerTool(
|
||||
descriptor.name,
|
||||
{
|
||||
description: descriptor.description,
|
||||
inputSchema: descriptor.inputSchema.shape,
|
||||
},
|
||||
async (toolInput) => {
|
||||
const normalized = normalizeKtxRuntimeToolOutput(await descriptor.execute(toolInput));
|
||||
return {
|
||||
content: [{ type: 'text', text: normalized.markdown }],
|
||||
...(normalized.structured !== undefined && normalized.structured !== null && typeof normalized.structured === 'object'
|
||||
? { structuredContent: normalized.structured as object }
|
||||
: {}),
|
||||
};
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
return server;
|
||||
}
|
||||
|
||||
function serverPort(server: Server, fallback: number): number {
|
||||
const address = server.address();
|
||||
return typeof address === 'object' && address ? address.port : fallback;
|
||||
}
|
||||
|
||||
export async function startCodexRuntimeMcpServer(
|
||||
input: StartCodexRuntimeMcpServerInput,
|
||||
): Promise<CodexRuntimeMcpServerHandle> {
|
||||
const bearerToken = randomBytes(32).toString('hex');
|
||||
const runServer = input.runServer ?? runKtxMcpHttpServer;
|
||||
const handle = (await runServer({
|
||||
projectDir: input.projectDir,
|
||||
host: '127.0.0.1',
|
||||
port: 0,
|
||||
token: bearerToken,
|
||||
allowedHosts: ['127.0.0.1', 'localhost'],
|
||||
allowedOrigins: [],
|
||||
createMcpServer: () => createCodexRuntimeMcpServer({ toolSet: input.toolSet }) as McpServer,
|
||||
})) as KtxMcpHttpServerHandle;
|
||||
const port = serverPort(handle.server, 0);
|
||||
return {
|
||||
url: `http://127.0.0.1:${port}/mcp`,
|
||||
bearerTokenEnvVar: 'KTX_CODEX_RUNTIME_MCP_TOKEN',
|
||||
bearerToken,
|
||||
close: () => handle.close(),
|
||||
};
|
||||
}
|
||||
20
packages/cli/src/context/llm/codex-models.ts
Normal file
20
packages/cli/src/context/llm/codex-models.ts
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
export const DEFAULT_CODEX_MODEL = 'gpt-5.5';
|
||||
|
||||
const CODEX_MODEL_ALIASES: Record<string, string> = {
|
||||
codex: DEFAULT_CODEX_MODEL,
|
||||
default: DEFAULT_CODEX_MODEL,
|
||||
};
|
||||
|
||||
const EXPLICIT_CODEX_MODEL_ID = /^(?:gpt|codex)-[a-z0-9][a-z0-9._-]*$/i;
|
||||
|
||||
export function resolveCodexModel(model: string): string {
|
||||
const normalized = model.trim();
|
||||
const alias = CODEX_MODEL_ALIASES[normalized];
|
||||
if (alias) {
|
||||
return alias;
|
||||
}
|
||||
if (EXPLICIT_CODEX_MODEL_ID.test(normalized)) {
|
||||
return normalized;
|
||||
}
|
||||
throw new Error(`Unsupported Codex model "${model}". Use codex, default, or a gpt-* / codex-* model id.`);
|
||||
}
|
||||
38
packages/cli/src/context/llm/codex-runtime-config.ts
Normal file
38
packages/cli/src/context/llm/codex-runtime-config.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
interface CodexRuntimeMcpConfig {
|
||||
url: string;
|
||||
bearerTokenEnvVar: string;
|
||||
bearerToken: string;
|
||||
toolNames: string[];
|
||||
}
|
||||
|
||||
export interface BuildCodexRuntimeConfigInput {
|
||||
model: string;
|
||||
mcp?: CodexRuntimeMcpConfig;
|
||||
}
|
||||
|
||||
export interface CodexRuntimeConfig {
|
||||
configOverrides: Record<string, unknown>;
|
||||
env: Record<string, string>;
|
||||
}
|
||||
|
||||
export function buildCodexRuntimeConfig(input: BuildCodexRuntimeConfigInput): CodexRuntimeConfig {
|
||||
const configOverrides: Record<string, unknown> = {
|
||||
history: { persistence: 'none' },
|
||||
};
|
||||
const env: Record<string, string> = {};
|
||||
|
||||
if (input.mcp) {
|
||||
configOverrides.mcp_servers = {
|
||||
ktx: {
|
||||
url: input.mcp.url,
|
||||
bearer_token_env_var: input.mcp.bearerTokenEnvVar,
|
||||
enabled_tools: input.mcp.toolNames,
|
||||
default_tools_approval_mode: 'approve',
|
||||
required: true,
|
||||
},
|
||||
};
|
||||
env[input.mcp.bearerTokenEnvVar] = input.mcp.bearerToken;
|
||||
}
|
||||
|
||||
return { configOverrides, env };
|
||||
}
|
||||
371
packages/cli/src/context/llm/codex-runtime.ts
Normal file
371
packages/cli/src/context/llm/codex-runtime.ts
Normal file
|
|
@ -0,0 +1,371 @@
|
|||
import { z } from 'zod';
|
||||
import { noopLogger, type KtxLogger } from '../core/config.js';
|
||||
import { isCompletedAgentStep, summarizeCodexExecEvents, type CodexExecEventSummary } from './codex-exec-events.js';
|
||||
import {
|
||||
startCodexRuntimeMcpServer,
|
||||
type CodexRuntimeMcpServerHandle,
|
||||
} from './codex-mcp-runtime-server.js';
|
||||
import { resolveCodexModel } from './codex-models.js';
|
||||
import { buildCodexRuntimeConfig } from './codex-runtime-config.js';
|
||||
import { CodexSdkCliRunner, type CodexSdkRunner } from './codex-sdk-runner.js';
|
||||
import type {
|
||||
KtxGenerateObjectInput,
|
||||
KtxGenerateTextInput,
|
||||
KtxLlmRuntimePort,
|
||||
KtxRuntimeToolSet,
|
||||
LlmTokenUsage,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
} from './runtime-port.js';
|
||||
|
||||
export interface CodexKtxLlmRuntimeDeps {
|
||||
projectDir: string;
|
||||
modelSlots: { default: string } & Partial<Record<string, string>>;
|
||||
runner?: CodexSdkRunner;
|
||||
startMcpServer?: (input: { projectDir: string; toolSet: KtxRuntimeToolSet }) => Promise<CodexRuntimeMcpServerHandle>;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
function modelForRole(modelSlots: CodexKtxLlmRuntimeDeps['modelSlots'], role: string): string {
|
||||
return resolveCodexModel(modelSlots[role] ?? modelSlots.default);
|
||||
}
|
||||
|
||||
function promptWithSystem(system: string | undefined, prompt: string): string {
|
||||
return [system, prompt].filter(Boolean).join('\n\n');
|
||||
}
|
||||
|
||||
interface CollectCodexEventsOptions {
|
||||
stepBudget?: number;
|
||||
abortController?: AbortController;
|
||||
onStep?: (stepIndex: number) => void | Promise<void>;
|
||||
}
|
||||
|
||||
interface CollectCodexEventsResult {
|
||||
events: unknown[];
|
||||
budgetExceeded: boolean;
|
||||
streamError?: Error;
|
||||
}
|
||||
|
||||
function eventRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === 'object' ? (value as Record<string, unknown>) : undefined;
|
||||
}
|
||||
|
||||
function isTurnCompleted(event: unknown): boolean {
|
||||
return eventRecord(event)?.type === 'turn.completed';
|
||||
}
|
||||
|
||||
/**
|
||||
* Drains the Codex stream once, emitting a step as each agent action completes
|
||||
* so callers see live progress and the step budget is enforced mid-run. Every
|
||||
* completed agent-action item counts (see {@link isCompletedAgentStep}), so
|
||||
* built-in `command_execution` steps decrement the budget the same as
|
||||
* `mcp_tool_call`s. A turn that produced no actions still counts as one step,
|
||||
* matching the metrics summary and the AI SDK backend.
|
||||
*/
|
||||
async function collectEvents(
|
||||
events: AsyncIterable<unknown>,
|
||||
options: CollectCodexEventsOptions = {},
|
||||
): Promise<CollectCodexEventsResult> {
|
||||
const collected: unknown[] = [];
|
||||
let completedSteps = 0;
|
||||
let sawActionStep = false;
|
||||
let budgetExceeded = false;
|
||||
let streamError: Error | undefined;
|
||||
|
||||
// The SDK yields every stdout event, then throws on a non-zero codex exec
|
||||
// exit. Catch that throw so the events already collected (which carry the
|
||||
// real `turn.failed`/`error` reason) survive for the summary; the masked
|
||||
// exit message is kept only as a fallback when no error event was emitted.
|
||||
try {
|
||||
for await (const event of events) {
|
||||
collected.push(event);
|
||||
|
||||
const isActionStep = isCompletedAgentStep(event);
|
||||
if (isActionStep) {
|
||||
sawActionStep = true;
|
||||
} else if (sawActionStep || !isTurnCompleted(event)) {
|
||||
// Only fall back to counting a bare turn as a step when the turn produced
|
||||
// no agent actions; a completed turn is terminal, so it never aborts.
|
||||
continue;
|
||||
}
|
||||
|
||||
completedSteps += 1;
|
||||
await options.onStep?.(completedSteps);
|
||||
if (isActionStep && options.stepBudget !== undefined && completedSteps >= options.stepBudget) {
|
||||
budgetExceeded = true;
|
||||
options.abortController?.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
streamError = error instanceof Error ? error : new Error(String(error));
|
||||
}
|
||||
|
||||
return { events: collected, budgetExceeded, ...(streamError ? { streamError } : {}) };
|
||||
}
|
||||
|
||||
function metrics(summary: CodexExecEventSummary, startedAt: number): { totalMs: number; usage: LlmTokenUsage } {
|
||||
return { totalMs: Date.now() - startedAt, usage: summary.usage };
|
||||
}
|
||||
|
||||
function summaryError(summary: CodexExecEventSummary, streamError?: Error): Error | undefined {
|
||||
// A `turn.failed`/`error` event carries the real reason; prefer it over the
|
||||
// SDK's generic non-zero-exit throw. Fall back to the stream error only when
|
||||
// no event explained the failure (e.g. spawn failure or auth before a turn).
|
||||
if (summary.error) {
|
||||
return summary.error;
|
||||
}
|
||||
if (summary.toolFailures.length > 0) {
|
||||
return new Error(`Codex runtime tool call failed: ${summary.toolFailures.join('; ')}`);
|
||||
}
|
||||
return streamError;
|
||||
}
|
||||
|
||||
function assertSuccessfulText(summary: CodexExecEventSummary, streamError?: Error): string {
|
||||
const error = summaryError(summary, streamError);
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
if (!summary.finalText.trim()) {
|
||||
throw new Error('Codex completed without an agent message');
|
||||
}
|
||||
return summary.finalText;
|
||||
}
|
||||
|
||||
function parseStructuredOutput<TOutput, TSchema extends z.ZodType<TOutput>>(schema: TSchema, text: string): TOutput {
|
||||
try {
|
||||
return schema.parse(JSON.parse(text));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
throw new Error(`Codex structured output failed validation: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function mcpForTools(input: {
|
||||
projectDir: string;
|
||||
toolSet?: KtxRuntimeToolSet;
|
||||
startMcpServer: CodexKtxLlmRuntimeDeps['startMcpServer'];
|
||||
}): Promise<CodexRuntimeMcpServerHandle | undefined> {
|
||||
if (!input.toolSet || Object.keys(input.toolSet).length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return (input.startMcpServer ?? startCodexRuntimeMcpServer)({
|
||||
projectDir: input.projectDir,
|
||||
toolSet: input.toolSet,
|
||||
});
|
||||
}
|
||||
|
||||
function runtimeToolNames(toolSet: KtxRuntimeToolSet | undefined): string[] {
|
||||
return Object.values(toolSet ?? {}).map((descriptor) => descriptor.name);
|
||||
}
|
||||
|
||||
export class CodexKtxLlmRuntime implements KtxLlmRuntimePort {
|
||||
private readonly runner: CodexSdkRunner;
|
||||
private readonly logger: KtxLogger;
|
||||
|
||||
constructor(private readonly deps: CodexKtxLlmRuntimeDeps) {
|
||||
this.runner = deps.runner ?? new CodexSdkCliRunner();
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
}
|
||||
|
||||
async generateText(input: KtxGenerateTextInput): Promise<string> {
|
||||
const startedAt = Date.now();
|
||||
const model = modelForRole(this.deps.modelSlots, input.role);
|
||||
const mcp = await mcpForTools({
|
||||
projectDir: this.deps.projectDir,
|
||||
toolSet: input.tools,
|
||||
startMcpServer: this.deps.startMcpServer,
|
||||
});
|
||||
try {
|
||||
const config = buildCodexRuntimeConfig({
|
||||
model,
|
||||
...(mcp
|
||||
? {
|
||||
mcp: {
|
||||
url: mcp.url,
|
||||
bearerTokenEnvVar: mcp.bearerTokenEnvVar,
|
||||
bearerToken: mcp.bearerToken,
|
||||
toolNames: runtimeToolNames(input.tools),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
const collected = await collectEvents(
|
||||
await this.runner.runStreamed({
|
||||
projectDir: this.deps.projectDir,
|
||||
model,
|
||||
prompt: promptWithSystem(input.system, input.prompt),
|
||||
configOverrides: config.configOverrides,
|
||||
env: config.env,
|
||||
}),
|
||||
);
|
||||
const summary = summarizeCodexExecEvents(collected.events, { startedAt });
|
||||
input.onMetrics?.(metrics(summary, startedAt));
|
||||
return assertSuccessfulText(summary, collected.streamError);
|
||||
} finally {
|
||||
await mcp?.close();
|
||||
}
|
||||
}
|
||||
|
||||
async generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
|
||||
input: KtxGenerateObjectInput<TOutput, TSchema>,
|
||||
): Promise<TOutput> {
|
||||
const startedAt = Date.now();
|
||||
const model = modelForRole(this.deps.modelSlots, input.role);
|
||||
const mcp = await mcpForTools({
|
||||
projectDir: this.deps.projectDir,
|
||||
toolSet: input.tools,
|
||||
startMcpServer: this.deps.startMcpServer,
|
||||
});
|
||||
try {
|
||||
const config = buildCodexRuntimeConfig({
|
||||
model,
|
||||
...(mcp
|
||||
? {
|
||||
mcp: {
|
||||
url: mcp.url,
|
||||
bearerTokenEnvVar: mcp.bearerTokenEnvVar,
|
||||
bearerToken: mcp.bearerToken,
|
||||
toolNames: runtimeToolNames(input.tools),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
const collected = await collectEvents(
|
||||
await this.runner.runStreamed({
|
||||
projectDir: this.deps.projectDir,
|
||||
model,
|
||||
prompt: promptWithSystem(input.system, input.prompt),
|
||||
configOverrides: config.configOverrides,
|
||||
env: config.env,
|
||||
outputSchema: z.toJSONSchema(input.schema, { target: 'draft-7' }) as Record<string, unknown>,
|
||||
}),
|
||||
);
|
||||
const summary = summarizeCodexExecEvents(collected.events, { startedAt });
|
||||
input.onMetrics?.(metrics(summary, startedAt));
|
||||
return parseStructuredOutput(input.schema, assertSuccessfulText(summary, collected.streamError));
|
||||
} finally {
|
||||
await mcp?.close();
|
||||
}
|
||||
}
|
||||
|
||||
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
const startedAt = Date.now();
|
||||
const model = modelForRole(this.deps.modelSlots, params.modelRole);
|
||||
let mcp: CodexRuntimeMcpServerHandle | undefined;
|
||||
try {
|
||||
mcp = await mcpForTools({
|
||||
projectDir: this.deps.projectDir,
|
||||
toolSet: params.toolSet,
|
||||
startMcpServer: this.deps.startMcpServer,
|
||||
});
|
||||
const config = buildCodexRuntimeConfig({
|
||||
model,
|
||||
...(mcp
|
||||
? {
|
||||
mcp: {
|
||||
url: mcp.url,
|
||||
bearerTokenEnvVar: mcp.bearerTokenEnvVar,
|
||||
bearerToken: mcp.bearerToken,
|
||||
toolNames: runtimeToolNames(params.toolSet),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
const abortController = new AbortController();
|
||||
const onStep = async (stepIndex: number): Promise<void> => {
|
||||
try {
|
||||
await params.onStepFinish?.({ stepIndex, stepBudget: params.stepBudget });
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`[codex-runner] onStepFinish callback threw; ignoring: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
const collected = await collectEvents(
|
||||
await this.runner.runStreamed({
|
||||
projectDir: this.deps.projectDir,
|
||||
model,
|
||||
prompt: promptWithSystem(params.systemPrompt, params.userPrompt),
|
||||
configOverrides: config.configOverrides,
|
||||
env: config.env,
|
||||
signal: abortController.signal,
|
||||
}),
|
||||
{ stepBudget: params.stepBudget, abortController, onStep },
|
||||
);
|
||||
const summary = summarizeCodexExecEvents(collected.events, { startedAt });
|
||||
const error = summaryError(summary, collected.streamError);
|
||||
const stopReason = collected.budgetExceeded ? 'budget' : error ? 'error' : summary.stopReason;
|
||||
return {
|
||||
stopReason,
|
||||
...(stopReason === 'error' && error ? { error } : {}),
|
||||
metrics: {
|
||||
totalMs: Date.now() - startedAt,
|
||||
usage: summary.usage,
|
||||
stepCount: summary.stepCount,
|
||||
stepBoundariesMs: summary.stepBoundariesMs,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
return {
|
||||
stopReason: 'error',
|
||||
error: err,
|
||||
metrics: { totalMs: Date.now() - startedAt, usage: {}, stepCount: 0, stepBoundariesMs: [] },
|
||||
};
|
||||
} finally {
|
||||
await mcp?.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A rejected model is not an auth failure: Codex authenticated, connected, and
|
||||
// the API refused the model id. These markers come from the API error envelope
|
||||
// (e.g. "model is not supported", "invalid_request_error").
|
||||
const MODEL_UNAVAILABLE_MARKERS =
|
||||
/\bnot supported\b|\bnot available\b|\bdoes not exist\b|invalid_request_error|\bunknown model\b|\bunsupported model\b/i;
|
||||
|
||||
function describeCodexProbeFailure(model: string, message: string): { message: string; fix: string } {
|
||||
if (MODEL_UNAVAILABLE_MARKERS.test(message)) {
|
||||
const fix = `Run \`codex\` to see the models your account supports, then set llm.models.default in ktx.yaml (or rerun \`ktx setup\`).`;
|
||||
return {
|
||||
message: `Codex is authenticated, but the configured model "${model}" is not available for this Codex account. ${fix} Details: ${message}`,
|
||||
fix,
|
||||
};
|
||||
}
|
||||
const fix = `Authenticate Codex locally with the Codex CLI, verify the Codex CLI is installed, then rerun setup or \`ktx status\`.`;
|
||||
return {
|
||||
message: `Codex authentication is not usable. ${fix} Details: ${message}`,
|
||||
fix,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runCodexAuthProbe(input: {
|
||||
projectDir: string;
|
||||
model: string;
|
||||
runner?: CodexSdkRunner;
|
||||
}): Promise<{ ok: true } | { ok: false; message: string; fix: string }> {
|
||||
let model: string;
|
||||
try {
|
||||
model = resolveCodexModel(input.model);
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
fix: 'Set llm.models.default in ktx.yaml to a supported codex model (codex, default, or a gpt-* / codex-* id), or rerun `ktx setup`.',
|
||||
};
|
||||
}
|
||||
|
||||
const runtime = new CodexKtxLlmRuntime({
|
||||
projectDir: input.projectDir,
|
||||
modelSlots: { default: model },
|
||||
...(input.runner ? { runner: input.runner } : {}),
|
||||
});
|
||||
try {
|
||||
await runtime.generateText({ role: 'default', prompt: 'Reply with exactly: ok' });
|
||||
return { ok: true };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { ok: false, ...describeCodexProbeFailure(model, message) };
|
||||
}
|
||||
}
|
||||
96
packages/cli/src/context/llm/codex-sdk-runner.ts
Normal file
96
packages/cli/src/context/llm/codex-sdk-runner.ts
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
import { Codex, type CodexOptions, type ThreadOptions, type TurnOptions } from '@openai/codex-sdk';
|
||||
|
||||
export interface CodexSdkRunnerInput {
|
||||
projectDir: string;
|
||||
model: string;
|
||||
prompt: string;
|
||||
configOverrides?: Record<string, unknown>;
|
||||
env?: Record<string, string>;
|
||||
outputSchema?: Record<string, unknown>;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export interface CodexSdkRunner {
|
||||
runStreamed(input: CodexSdkRunnerInput): Promise<AsyncIterable<unknown>>;
|
||||
}
|
||||
|
||||
type CodexThread = {
|
||||
runStreamed(input: string, turnOptions?: TurnOptions): Promise<{ events: AsyncIterable<unknown> }>;
|
||||
};
|
||||
|
||||
type CodexClient = {
|
||||
startThread(options: ThreadOptions): CodexThread;
|
||||
};
|
||||
|
||||
type CodexConstructor = new (options?: CodexOptions) => CodexClient;
|
||||
|
||||
export interface CodexSdkCliRunnerOptions {
|
||||
envBase?: NodeJS.ProcessEnv;
|
||||
codexPathOverride?: string;
|
||||
}
|
||||
|
||||
const CODEX_ENV_ALLOWLIST = new Set([
|
||||
'HOME',
|
||||
'USERPROFILE',
|
||||
'APPDATA',
|
||||
'LOCALAPPDATA',
|
||||
'XDG_CONFIG_HOME',
|
||||
'CODEX_HOME',
|
||||
'CODEX_API_KEY',
|
||||
'OPENAI_API_KEY',
|
||||
'PATH',
|
||||
'Path',
|
||||
'SYSTEMROOT',
|
||||
'COMSPEC',
|
||||
'TMPDIR',
|
||||
'TMP',
|
||||
'TEMP',
|
||||
'SSL_CERT_FILE',
|
||||
'SSL_CERT_DIR',
|
||||
'NODE_EXTRA_CA_CERTS',
|
||||
'HTTPS_PROXY',
|
||||
'HTTP_PROXY',
|
||||
'ALL_PROXY',
|
||||
'NO_PROXY',
|
||||
]);
|
||||
|
||||
function buildCodexSdkEnv(baseEnv: NodeJS.ProcessEnv, overrides: Record<string, string> | undefined): Record<string, string> {
|
||||
const env: Record<string, string> = {};
|
||||
for (const key of CODEX_ENV_ALLOWLIST) {
|
||||
const value = baseEnv[key];
|
||||
if (typeof value === 'string') {
|
||||
env[key] = value;
|
||||
}
|
||||
}
|
||||
return { ...env, ...(overrides ?? {}) };
|
||||
}
|
||||
|
||||
export class CodexSdkCliRunner implements CodexSdkRunner {
|
||||
constructor(private readonly options: CodexSdkCliRunnerOptions = {}) {}
|
||||
|
||||
async runStreamed(input: CodexSdkRunnerInput): Promise<AsyncIterable<unknown>> {
|
||||
const CodexClass = Codex as CodexConstructor;
|
||||
const codex = new CodexClass({
|
||||
...(input.configOverrides ? { config: input.configOverrides as CodexOptions['config'] } : {}),
|
||||
env: buildCodexSdkEnv(this.options.envBase ?? process.env, input.env),
|
||||
...(this.options.codexPathOverride ? { codexPathOverride: this.options.codexPathOverride } : {}),
|
||||
});
|
||||
const thread = codex.startThread({
|
||||
workingDirectory: input.projectDir,
|
||||
skipGitRepoCheck: true,
|
||||
model: input.model,
|
||||
sandboxMode: 'read-only',
|
||||
webSearchMode: 'disabled',
|
||||
approvalPolicy: 'never',
|
||||
});
|
||||
const turnOptions: TurnOptions = {
|
||||
...(input.outputSchema ? { outputSchema: input.outputSchema } : {}),
|
||||
...(input.signal ? { signal: input.signal } : {}),
|
||||
};
|
||||
const streamed = await thread.runStreamed(
|
||||
input.prompt,
|
||||
Object.keys(turnOptions).length > 0 ? turnOptions : undefined,
|
||||
);
|
||||
return streamed.events;
|
||||
}
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ import { resolveKtxConfigReference } from '../core/config-reference.js';
|
|||
import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig } from '../project/config.js';
|
||||
import { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
|
||||
import { ClaudeCodeKtxLlmRuntime } from './claude-code-runtime.js';
|
||||
import { CodexKtxLlmRuntime } from './codex-runtime.js';
|
||||
import type { KtxLlmRuntimePort } from './runtime-port.js';
|
||||
|
||||
interface LocalConfigDeps {
|
||||
|
|
@ -13,6 +14,7 @@ interface LocalConfigDeps {
|
|||
createKtxLlmProvider?: typeof createKtxLlmProvider;
|
||||
createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
|
||||
createClaudeCodeRuntime?: (deps: ConstructorParameters<typeof ClaudeCodeKtxLlmRuntime>[0]) => KtxLlmRuntimePort;
|
||||
createCodexRuntime?: (deps: ConstructorParameters<typeof CodexKtxLlmRuntime>[0]) => KtxLlmRuntimePort;
|
||||
createAiSdkRuntime?: (deps: { llmProvider: KtxLlmProvider }) => KtxLlmRuntimePort;
|
||||
}
|
||||
|
||||
|
|
@ -104,7 +106,7 @@ export function createLocalKtxLlmProviderFromConfig(
|
|||
deps: LocalConfigDeps = {},
|
||||
): KtxLlmProvider | null {
|
||||
const resolved = resolveLocalKtxLlmConfig(config, deps.env ?? process.env);
|
||||
if (!resolved || resolved.backend === 'claude-code') {
|
||||
if (!resolved || resolved.backend === 'claude-code' || resolved.backend === 'codex') {
|
||||
return null;
|
||||
}
|
||||
return (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
|
||||
|
|
@ -129,6 +131,16 @@ export function createLocalKtxLlmRuntimeFromConfig(
|
|||
env: deps.env,
|
||||
});
|
||||
}
|
||||
if (resolved.backend === 'codex') {
|
||||
const projectDir = deps.projectDir;
|
||||
if (!projectDir) {
|
||||
throw new Error('projectDir is required when creating the codex LLM runtime');
|
||||
}
|
||||
return (deps.createCodexRuntime ?? ((runtimeDeps) => new CodexKtxLlmRuntime(runtimeDeps)))({
|
||||
projectDir,
|
||||
modelSlots: resolved.modelSlots,
|
||||
});
|
||||
}
|
||||
const llmProvider = (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
|
||||
return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({ llmProvider });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import YAML from 'yaml';
|
|||
import * as z from 'zod';
|
||||
import { connectionConfigSchema } from './driver-schemas.js';
|
||||
|
||||
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code'] as const;
|
||||
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code', 'codex'] as const;
|
||||
const KTX_EMBEDDING_BACKENDS = ['none', 'openai', 'sentence-transformers'] as const;
|
||||
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
|
||||
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
|
||||
|
|
@ -38,7 +38,7 @@ const llmProviderSchema = z
|
|||
.enum(KTX_LLM_BACKENDS)
|
||||
.default('none')
|
||||
.describe(
|
||||
'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session.',
|
||||
'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session; "codex" uses the local Codex session.',
|
||||
),
|
||||
vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
|
||||
anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue