ktx/scripts/codex-backend-live-smoke.mjs

import { execFile } from 'node:child_process';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
import { promisify } from 'node:util';

const execFileAsync = promisify(execFile);
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url));
const ROOT_DIR = resolve(SCRIPT_DIR, '..');
const OPT_IN_MESSAGE =
  'Set KTX_RUN_CODEX_BACKEND_SMOKE=1 or pass --force to run the Codex backend live smoke.';

export function codexBackendSmokeOptIn(env = process.env, args = process.argv.slice(2)) {
  if (env.KTX_RUN_CODEX_BACKEND_SMOKE === '1' || args.includes('--force')) {
    return { run: true };
  }
  return { run: false, message: OPT_IN_MESSAGE };
}

async function run(command, args, options = {}) {
  process.stdout.write(`$ ${command} ${args.join(' ')}\n`);
  try {
    const result = await execFileAsync(command, args, {
      cwd: options.cwd ?? ROOT_DIR,
      env: { ...process.env, ...(options.env ?? {}) },
      encoding: 'utf8',
      maxBuffer: 1024 * 1024 * 20,
      timeout: options.timeoutMs ?? 300_000,
    });
    if (result.stdout) {
      process.stdout.write(result.stdout);
    }
    if (result.stderr) {
      process.stderr.write(result.stderr);
    }
    return { code: 0, stdout: result.stdout, stderr: result.stderr };
  } catch (error) {
    const stdout = typeof error.stdout === 'string' ? error.stdout : '';
    const stderr = typeof error.stderr === 'string' ? error.stderr : error.message;
    if (stdout) {
      process.stdout.write(stdout);
    }
    if (stderr) {
      process.stderr.write(stderr);
    }
    return {
      code: typeof error.code === 'number' ? error.code : 1,
      stdout,
      stderr,
    };
  }
}

function requireSuccess(label, result) {
  if (result.code !== 0) {
    throw new Error(`${label} failed with code ${result.code}\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`);
  }
}

async function runSetupSmoke(projectDir) {
  const result = await run(
    'node',
    [
      join(ROOT_DIR, 'packages/cli/dist/bin.js'),
      'setup',
      '--project-dir',
      projectDir,
      '--llm-backend',
      'codex',
      '--llm-model',
      'gpt-5.3-codex',
      '--no-input',
      '--yes',
      '--skip-databases',
      '--skip-sources',
      '--skip-agents',
    ],
    { timeoutMs: 600_000 },
  );
  requireSuccess('ktx setup codex backend', result);
  if (!result.stdout.includes('LLM ready: yes (codex, gpt-5.3-codex)')) {
    throw new Error(`setup did not report Codex LLM readiness\nstdout:\n${result.stdout}`);
  }
}

async function runRuntimeSmoke(projectDir) {
  const runtimeUrl = pathToFileURL(join(ROOT_DIR, 'packages/cli/dist/context/llm/codex-runtime.js')).href;
  const zodUrl = pathToFileURL(join(ROOT_DIR, 'packages/cli/node_modules/zod/index.js')).href;
  const { CodexKtxLlmRuntime } = await import(runtimeUrl);
  const { z } = await import(zodUrl);
  const runtime = new CodexKtxLlmRuntime({
    projectDir,
    modelSlots: { default: 'gpt-5.3-codex' },
  });

  const text = await runtime.generateText({
    role: 'default',
    prompt: 'Reply with exactly: ktx_codex_text_ok',
  });
  if (text.trim() !== 'ktx_codex_text_ok') {
    throw new Error(`Codex text smoke returned unexpected text: ${text}`);
  }

  let toolCalls = 0;
  const loop = await runtime.runAgentLoop({
    modelRole: 'default',
    systemPrompt: 'You must use available tools when the user asks for a tool result.',
    userPrompt:
      'Call the echo_value tool with {"value":"ktx_codex_tool_ok"}, then finish after the tool returns.',
    toolSet: {
      echo_value: {
        name: 'echo_value',
        description: 'Return the provided value as markdown.',
        inputSchema: z.object({ value: z.string() }),
        execute: async (input) => {
          toolCalls += 1;
          return { markdown: `echo:${input.value}` };
        },
      },
    },
    stepBudget: 4,
    telemetryTags: {},
  });

  if (loop.stopReason !== 'natural') {
    throw new Error(`Codex tool smoke stopped with ${loop.stopReason}: ${loop.error?.message ?? 'no error'}`);
  }
  if (toolCalls !== 1) {
    throw new Error(`Expected Codex to call echo_value exactly once, got ${toolCalls}`);
  }
}

export async function runCodexBackendLiveSmoke() {
  const projectDir = await mkdtemp(join(tmpdir(), 'ktx-codex-backend-smoke-'));
  try {
    requireSuccess(
      'ktx build',
      await run('pnpm', ['--filter', '@kaelio/ktx', 'run', 'build'], { timeoutMs: 600_000 }),
    );
    await runSetupSmoke(projectDir);
    await runRuntimeSmoke(projectDir);
    process.stdout.write(`Codex backend live smoke passed in ${projectDir}\n`);
  } finally {
    await rm(projectDir, { recursive: true, force: true });
  }
}

async function main() {
  const optIn = codexBackendSmokeOptIn();
  if (!optIn.run) {
    process.stdout.write(`${optIn.message}\n`);
    return;
  }
  await runCodexBackendLiveSmoke();
}

if (import.meta.url === pathToFileURL(process.argv[1] ?? '').href) {
  await main();
}
feat: add codex llm backend for ktx runtime work (#253) * feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that -codex ids require API-key auth. fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-/codex- ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior. 2026-06-02 13:57:11 +02:00			`import { execFile } from 'node:child_process';`
			`import { mkdtemp, rm } from 'node:fs/promises';`
			`import { tmpdir } from 'node:os';`
			`import { dirname, join, resolve } from 'node:path';`
			`import { fileURLToPath, pathToFileURL } from 'node:url';`
			`import { promisify } from 'node:util';`

			`const execFileAsync = promisify(execFile);`
			`const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url));`
			`const ROOT_DIR = resolve(SCRIPT_DIR, '..');`
			`const OPT_IN_MESSAGE =`
			`'Set KTX_RUN_CODEX_BACKEND_SMOKE=1 or pass --force to run the Codex backend live smoke.';`

			`export function codexBackendSmokeOptIn(env = process.env, args = process.argv.slice(2)) {`
			`if (env.KTX_RUN_CODEX_BACKEND_SMOKE === '1' \|\| args.includes('--force')) {`
			`return { run: true };`
			`}`
			`return { run: false, message: OPT_IN_MESSAGE };`
			`}`

			`async function run(command, args, options = {}) {`
			process.stdout.write(`$ ${command} ${args.join(' ')}\n`);
			`try {`
			`const result = await execFileAsync(command, args, {`
			`cwd: options.cwd ?? ROOT_DIR,`
			`env: { ...process.env, ...(options.env ?? {}) },`
			`encoding: 'utf8',`
			`maxBuffer: 1024 * 1024 * 20,`
			`timeout: options.timeoutMs ?? 300_000,`
			`});`
			`if (result.stdout) {`
			`process.stdout.write(result.stdout);`
			`}`
			`if (result.stderr) {`
			`process.stderr.write(result.stderr);`
			`}`
			`return { code: 0, stdout: result.stdout, stderr: result.stderr };`
			`} catch (error) {`
			`const stdout = typeof error.stdout === 'string' ? error.stdout : '';`
			`const stderr = typeof error.stderr === 'string' ? error.stderr : error.message;`
			`if (stdout) {`
			`process.stdout.write(stdout);`
			`}`
			`if (stderr) {`
			`process.stderr.write(stderr);`
			`}`
			`return {`
			`code: typeof error.code === 'number' ? error.code : 1,`
			`stdout,`
			`stderr,`
			`};`
			`}`
			`}`

			`function requireSuccess(label, result) {`
			`if (result.code !== 0) {`
			throw new Error(`${label} failed with code ${result.code}\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`);
			`}`
			`}`

			`async function runSetupSmoke(projectDir) {`
			`const result = await run(`
			`'node',`
			`[`
			`join(ROOT_DIR, 'packages/cli/dist/bin.js'),`
			`'setup',`
			`'--project-dir',`
			`projectDir,`
			`'--llm-backend',`
			`'codex',`
			`'--llm-model',`
			`'gpt-5.3-codex',`
			`'--no-input',`
			`'--yes',`
			`'--skip-databases',`
			`'--skip-sources',`
			`'--skip-agents',`
			`],`
			`{ timeoutMs: 600_000 },`
			`);`
			`requireSuccess('ktx setup codex backend', result);`
			`if (!result.stdout.includes('LLM ready: yes (codex, gpt-5.3-codex)')) {`
			throw new Error(`setup did not report Codex LLM readiness\nstdout:\n${result.stdout}`);
			`}`
			`}`

			`async function runRuntimeSmoke(projectDir) {`
			`const runtimeUrl = pathToFileURL(join(ROOT_DIR, 'packages/cli/dist/context/llm/codex-runtime.js')).href;`
			`const zodUrl = pathToFileURL(join(ROOT_DIR, 'packages/cli/node_modules/zod/index.js')).href;`
			`const { CodexKtxLlmRuntime } = await import(runtimeUrl);`
			`const { z } = await import(zodUrl);`
			`const runtime = new CodexKtxLlmRuntime({`
			`projectDir,`
			`modelSlots: { default: 'gpt-5.3-codex' },`
			`});`

			`const text = await runtime.generateText({`
			`role: 'default',`
			`prompt: 'Reply with exactly: ktx_codex_text_ok',`
			`});`
			`if (text.trim() !== 'ktx_codex_text_ok') {`
			throw new Error(`Codex text smoke returned unexpected text: ${text}`);
			`}`

			`let toolCalls = 0;`
			`const loop = await runtime.runAgentLoop({`
			`modelRole: 'default',`
			`systemPrompt: 'You must use available tools when the user asks for a tool result.',`
			`userPrompt:`
			`'Call the echo_value tool with {"value":"ktx_codex_tool_ok"}, then finish after the tool returns.',`
			`toolSet: {`
			`echo_value: {`
			`name: 'echo_value',`
			`description: 'Return the provided value as markdown.',`
			`inputSchema: z.object({ value: z.string() }),`
			`execute: async (input) => {`
			`toolCalls += 1;`
			return { markdown: `echo:${input.value}` };
			`},`
			`},`
			`},`
			`stepBudget: 4,`
			`telemetryTags: {},`
			`});`

			`if (loop.stopReason !== 'natural') {`
			throw new Error(`Codex tool smoke stopped with ${loop.stopReason}: ${loop.error?.message ?? 'no error'}`);
			`}`
			`if (toolCalls !== 1) {`
			throw new Error(`Expected Codex to call echo_value exactly once, got ${toolCalls}`);
			`}`
			`}`

			`export async function runCodexBackendLiveSmoke() {`
			`const projectDir = await mkdtemp(join(tmpdir(), 'ktx-codex-backend-smoke-'));`
			`try {`
			`requireSuccess(`
			`'ktx build',`
			`await run('pnpm', ['--filter', '@kaelio/ktx', 'run', 'build'], { timeoutMs: 600_000 }),`
			`);`
			`await runSetupSmoke(projectDir);`
			`await runRuntimeSmoke(projectDir);`
			process.stdout.write(`Codex backend live smoke passed in ${projectDir}\n`);
			`} finally {`
			`await rm(projectDir, { recursive: true, force: true });`
			`}`
			`}`

			`async function main() {`
			`const optIn = codexBackendSmokeOptIn();`
			`if (!optIn.run) {`
			process.stdout.write(`${optIn.message}\n`);
			`return;`
			`}`
			`await runCodexBackendLiveSmoke();`
			`}`

			`if (import.meta.url === pathToFileURL(process.argv[1] ?? '').href) {`
			`await main();`
			`}`