mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
277 lines
9.1 KiB
TypeScript
277 lines
9.1 KiB
TypeScript
import { describe, expect, it, vi } from 'vitest';
|
|
import { z } from 'zod';
|
|
import {
|
|
CodexKtxLlmRuntime,
|
|
runCodexAuthProbe,
|
|
} from '../../../src/context/llm/codex-runtime.js';
|
|
|
|
async function* events(items: unknown[]) {
|
|
for (const item of items) {
|
|
yield item;
|
|
}
|
|
}
|
|
|
|
function runner(items: unknown[]) {
|
|
return {
|
|
runStreamed: vi.fn(async () => events(items)),
|
|
};
|
|
}
|
|
|
|
function budgetRunner() {
|
|
let observedSignal: AbortSignal | undefined;
|
|
return {
|
|
observedSignal: () => observedSignal,
|
|
runStreamed: vi.fn(async (input: { signal?: AbortSignal }) => {
|
|
observedSignal = input.signal;
|
|
return events([
|
|
{ type: 'turn.started' },
|
|
{ type: 'item.started', item: { type: 'mcp_tool_call', server: 'ktx', tool: 'first', status: 'in_progress' } },
|
|
{ type: 'item.completed', item: { type: 'mcp_tool_call', server: 'ktx', tool: 'first', status: 'completed' } },
|
|
{ type: 'item.started', item: { type: 'mcp_tool_call', server: 'ktx', tool: 'second', status: 'in_progress' } },
|
|
{ type: 'item.completed', item: { type: 'mcp_tool_call', server: 'ktx', tool: 'second', status: 'completed' } },
|
|
{ type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 1 } },
|
|
]);
|
|
}),
|
|
};
|
|
}
|
|
|
|
describe('CodexKtxLlmRuntime', () => {
|
|
it('generates text with the role-selected model and metrics', async () => {
|
|
const onMetrics = vi.fn();
|
|
const fakeRunner = runner([
|
|
{ type: 'turn.started' },
|
|
{ type: 'item.completed', item: { type: 'agent_message', text: 'hello' } },
|
|
{ type: 'turn.completed', usage: { input_tokens: 3, output_tokens: 4, total_tokens: 7 } },
|
|
]);
|
|
const runtime = new CodexKtxLlmRuntime({
|
|
projectDir: '/tmp/project',
|
|
modelSlots: { default: 'codex', triage: 'gpt-5.4' },
|
|
runner: fakeRunner,
|
|
});
|
|
|
|
await expect(runtime.generateText({ role: 'triage', system: 'system', prompt: 'prompt', onMetrics })).resolves.toBe('hello');
|
|
expect(fakeRunner.runStreamed).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
projectDir: '/tmp/project',
|
|
model: 'gpt-5.4',
|
|
prompt: 'system\n\nprompt',
|
|
}),
|
|
);
|
|
expect(onMetrics).toHaveBeenCalledWith(expect.objectContaining({ usage: { inputTokens: 3, outputTokens: 4, totalTokens: 7 } }));
|
|
});
|
|
|
|
it('generates and validates structured output', async () => {
|
|
const fakeRunner = runner([
|
|
{ type: 'turn.started' },
|
|
{ type: 'item.completed', item: { type: 'agent_message', text: '{"answer":"yes"}' } },
|
|
{ type: 'turn.completed' },
|
|
]);
|
|
const runtime = new CodexKtxLlmRuntime({
|
|
projectDir: '/tmp/project',
|
|
modelSlots: { default: 'codex' },
|
|
runner: fakeRunner,
|
|
});
|
|
|
|
await expect(
|
|
runtime.generateObject({
|
|
role: 'default',
|
|
prompt: 'json',
|
|
schema: z.object({ answer: z.string() }),
|
|
}),
|
|
).resolves.toEqual({ answer: 'yes' });
|
|
expect(fakeRunner.runStreamed).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
outputSchema: expect.objectContaining({ type: 'object' }),
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('returns a structured-output error when Codex final text is invalid JSON', async () => {
|
|
const fakeRunner = runner([
|
|
{ type: 'turn.started' },
|
|
{ type: 'item.completed', item: { type: 'agent_message', text: 'not json' } },
|
|
{ type: 'turn.completed' },
|
|
]);
|
|
const runtime = new CodexKtxLlmRuntime({
|
|
projectDir: '/tmp/project',
|
|
modelSlots: { default: 'codex' },
|
|
runner: fakeRunner,
|
|
});
|
|
|
|
await expect(
|
|
runtime.generateObject({
|
|
role: 'default',
|
|
prompt: 'json',
|
|
schema: z.object({ answer: z.string() }),
|
|
}),
|
|
).rejects.toThrow('Codex structured output failed validation');
|
|
});
|
|
|
|
it('starts and closes a temporary MCP server for tool-backed agent loops', async () => {
|
|
const close = vi.fn(async () => undefined);
|
|
const startMcpServer = vi.fn(async () => ({
|
|
url: 'http://127.0.0.1:4321/mcp',
|
|
bearerTokenEnvVar: 'KTX_CODEX_RUNTIME_MCP_TOKEN' as const,
|
|
bearerToken: 'token',
|
|
close,
|
|
}));
|
|
const fakeRunner = runner([
|
|
{ type: 'turn.started' },
|
|
{ type: 'item.started', item: { type: 'mcp_tool_call', name: 'wiki_search' } },
|
|
{ type: 'item.completed', item: { type: 'agent_message', text: 'done' } },
|
|
{ type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 } },
|
|
]);
|
|
const runtime = new CodexKtxLlmRuntime({
|
|
projectDir: '/tmp/project',
|
|
modelSlots: { default: 'codex' },
|
|
runner: fakeRunner,
|
|
startMcpServer,
|
|
});
|
|
const onStepFinish = vi.fn();
|
|
|
|
const result = await runtime.runAgentLoop({
|
|
modelRole: 'default',
|
|
systemPrompt: 'system',
|
|
userPrompt: 'user',
|
|
stepBudget: 5,
|
|
telemetryTags: {},
|
|
onStepFinish,
|
|
toolSet: {
|
|
aliased_wiki_tool: {
|
|
name: 'wiki_search',
|
|
description: 'Search wiki',
|
|
inputSchema: z.object({ query: z.string() }),
|
|
execute: vi.fn(),
|
|
},
|
|
},
|
|
});
|
|
|
|
expect(result.stopReason).toBe('natural');
|
|
expect(result.metrics).toMatchObject({ stepCount: 1, usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 } });
|
|
expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 5 });
|
|
expect(startMcpServer).toHaveBeenCalledWith({ projectDir: '/tmp/project', toolSet: expect.any(Object) });
|
|
expect(fakeRunner.runStreamed).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
env: { KTX_CODEX_RUNTIME_MCP_TOKEN: 'token' },
|
|
configOverrides: expect.objectContaining({
|
|
mcp_servers: expect.objectContaining({
|
|
ktx: expect.objectContaining({
|
|
url: 'http://127.0.0.1:4321/mcp',
|
|
enabled_tools: ['wiki_search'],
|
|
required: true,
|
|
}),
|
|
}),
|
|
}),
|
|
}),
|
|
);
|
|
expect(close).toHaveBeenCalled();
|
|
});
|
|
|
|
it('returns error stop reason on turn failure', async () => {
|
|
const runtime = new CodexKtxLlmRuntime({
|
|
projectDir: '/tmp/project',
|
|
modelSlots: { default: 'codex' },
|
|
runner: runner([{ type: 'turn.failed', error: { message: 'boom' } }]),
|
|
});
|
|
|
|
const result = await runtime.runAgentLoop({
|
|
modelRole: 'default',
|
|
systemPrompt: 'system',
|
|
userPrompt: 'user',
|
|
stepBudget: 5,
|
|
telemetryTags: {},
|
|
toolSet: {},
|
|
});
|
|
|
|
expect(result.stopReason).toBe('error');
|
|
expect(result.error?.message).toBe('boom');
|
|
});
|
|
|
|
it('surfaces failed MCP tool calls as agent-loop errors', async () => {
|
|
const runtime = new CodexKtxLlmRuntime({
|
|
projectDir: '/tmp/project',
|
|
modelSlots: { default: 'codex' },
|
|
runner: runner([
|
|
{ type: 'turn.started' },
|
|
{ type: 'item.started', item: { type: 'mcp_tool_call', server: 'ktx', tool: 'search', status: 'in_progress' } },
|
|
{
|
|
type: 'item.completed',
|
|
item: {
|
|
type: 'mcp_tool_call',
|
|
server: 'ktx',
|
|
tool: 'search',
|
|
status: 'failed',
|
|
error: { message: 'denied' },
|
|
},
|
|
},
|
|
{ type: 'turn.completed', usage: { input_tokens: 1, output_tokens: 1 } },
|
|
]),
|
|
});
|
|
|
|
const result = await runtime.runAgentLoop({
|
|
modelRole: 'default',
|
|
systemPrompt: 'system',
|
|
userPrompt: 'user',
|
|
stepBudget: 5,
|
|
telemetryTags: {},
|
|
toolSet: {},
|
|
});
|
|
|
|
expect(result.stopReason).toBe('error');
|
|
expect(result.error?.message).toBe('Codex runtime tool call failed: search: denied');
|
|
expect(result.metrics).toMatchObject({
|
|
stepCount: 1,
|
|
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
|
});
|
|
});
|
|
|
|
it('returns budget and aborts the Codex stream when local MCP step budget is reached', async () => {
|
|
const fakeRunner = budgetRunner();
|
|
const runtime = new CodexKtxLlmRuntime({
|
|
projectDir: '/tmp/project',
|
|
modelSlots: { default: 'codex' },
|
|
runner: fakeRunner,
|
|
});
|
|
const onStepFinish = vi.fn();
|
|
|
|
const result = await runtime.runAgentLoop({
|
|
modelRole: 'default',
|
|
systemPrompt: 'system',
|
|
userPrompt: 'user',
|
|
stepBudget: 1,
|
|
telemetryTags: {},
|
|
onStepFinish,
|
|
toolSet: {
|
|
first: {
|
|
name: 'first',
|
|
description: 'First tool',
|
|
inputSchema: z.object({}),
|
|
execute: vi.fn(),
|
|
},
|
|
},
|
|
});
|
|
|
|
expect(result.stopReason).toBe('budget');
|
|
expect(result.error).toBeUndefined();
|
|
expect(result.metrics).toMatchObject({ stepCount: 1 });
|
|
expect(onStepFinish).toHaveBeenCalledTimes(1);
|
|
expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 1 });
|
|
expect(fakeRunner.observedSignal()?.aborted).toBe(true);
|
|
});
|
|
|
|
it('probes Codex authentication through a minimal non-interactive turn', async () => {
|
|
const fakeRunner = runner([
|
|
{ type: 'turn.started' },
|
|
{ type: 'item.completed', item: { type: 'agent_message', text: 'ok' } },
|
|
{ type: 'turn.completed' },
|
|
]);
|
|
|
|
await expect(
|
|
runCodexAuthProbe({
|
|
projectDir: '/tmp/project',
|
|
model: 'codex',
|
|
runner: fakeRunner,
|
|
}),
|
|
).resolves.toEqual({ ok: true });
|
|
});
|
|
});
|