mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
* docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
336 lines
11 KiB
TypeScript
336 lines
11 KiB
TypeScript
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
|
|
vi.mock('ai', () => ({
|
|
generateText: vi.fn(),
|
|
stepCountIs: (n: number) => n,
|
|
tool: (def: unknown) => def,
|
|
}));
|
|
|
|
import { generateText } from 'ai';
|
|
import { AgentRunnerService, type RunLoopStepInfo } from './agent-runner.service.js';
|
|
|
|
describe('AgentRunnerService.runLoop', () => {
|
|
let runner: AgentRunnerService;
|
|
const llmProvider = {
|
|
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
|
|
getModelByName: vi.fn(),
|
|
cacheMarker: vi.fn(),
|
|
repairToolCallHandler: vi.fn(),
|
|
thinkingProviderOptions: vi.fn(),
|
|
telemetryConfig: vi.fn(),
|
|
promptCachingConfig: vi.fn(() => ({
|
|
enabled: false,
|
|
systemTtl: '1h',
|
|
toolsTtl: '1h',
|
|
historyTtl: '5m',
|
|
cacheSystem: true,
|
|
cacheTools: true,
|
|
cacheHistory: true,
|
|
vertexFallbackTo5m: false,
|
|
})),
|
|
activeBackend: vi.fn(() => 'anthropic'),
|
|
};
|
|
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
runner = new AgentRunnerService({ llmProvider: llmProvider as any });
|
|
});
|
|
|
|
afterEach(() => vi.clearAllMocks());
|
|
|
|
it('passes systemPrompt, userPrompt, tools, and step budget through to generateText', async () => {
|
|
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
|
const repairHandler = vi.fn();
|
|
llmProvider.repairToolCallHandler.mockReturnValueOnce(repairHandler);
|
|
const tools = { noop: { description: 'noop', inputSchema: {}, execute: vi.fn() } };
|
|
await runner.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: 'SYS',
|
|
userPrompt: 'USR',
|
|
toolSet: tools as any,
|
|
stepBudget: 17,
|
|
telemetryTags: { source: 'test' },
|
|
});
|
|
const call = (generateText as any).mock.calls[0][0];
|
|
expect(call.system).toEqual({ role: 'system', content: 'SYS' });
|
|
expect(call.messages).toEqual([{ role: 'user', content: 'USR' }]);
|
|
expect(call.prompt).toBeUndefined();
|
|
expect(call.tools.noop).toEqual(
|
|
expect.objectContaining({
|
|
description: 'noop',
|
|
inputSchema: {},
|
|
execute: expect.any(Function),
|
|
toModelOutput: expect.any(Function),
|
|
}),
|
|
);
|
|
expect(call.stopWhen).toBe(17);
|
|
expect(call.temperature).toBe(0);
|
|
expect(call.experimental_repairToolCall).toBe(repairHandler);
|
|
expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction');
|
|
expect(llmProvider.repairToolCallHandler).toHaveBeenCalledWith({ source: 'ktx-agent-runner' });
|
|
});
|
|
|
|
it('returns stopReason=natural when the loop completes without error', async () => {
|
|
(generateText as any).mockResolvedValue({ text: 'done', toolCalls: [], steps: [] });
|
|
const result = await runner.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: 'system',
|
|
userPrompt: 'user',
|
|
toolSet: {},
|
|
stepBudget: 10,
|
|
telemetryTags: {},
|
|
});
|
|
expect(result.stopReason).toBe('natural');
|
|
expect(result.error).toBeUndefined();
|
|
expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction');
|
|
expect(generateText).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
system: { role: 'system', content: 'system' },
|
|
messages: [{ role: 'user', content: 'user' }],
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('returns stopReason=error with the error on generateText failure', async () => {
|
|
const err = new Error('LLM unavailable');
|
|
(generateText as any).mockRejectedValue(err);
|
|
const result = await runner.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: '',
|
|
userPrompt: '',
|
|
toolSet: {},
|
|
stepBudget: 10,
|
|
telemetryTags: {},
|
|
});
|
|
expect(result.stopReason).toBe('error');
|
|
expect(result.error).toBe(err);
|
|
});
|
|
|
|
it('invokes caller onStepFinish with incrementing stepIndex and total budget', async () => {
|
|
const calls: RunLoopStepInfo[] = [];
|
|
(generateText as any).mockImplementation(async (opts: any) => {
|
|
for (let i = 0; i < 3; i++) {
|
|
await opts.onStepFinish({});
|
|
}
|
|
return { text: 'ok', toolCalls: [], steps: [] };
|
|
});
|
|
|
|
await runner.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: '',
|
|
userPrompt: '',
|
|
toolSet: {},
|
|
stepBudget: 10,
|
|
telemetryTags: {},
|
|
onStepFinish: (info) => {
|
|
calls.push(info);
|
|
},
|
|
});
|
|
|
|
expect(calls).toEqual([
|
|
{ stepIndex: 1, stepBudget: 10 },
|
|
{ stepIndex: 2, stepBudget: 10 },
|
|
{ stepIndex: 3, stepBudget: 10 },
|
|
]);
|
|
});
|
|
|
|
it('swallows errors thrown from caller onStepFinish without aborting the loop', async () => {
|
|
(generateText as any).mockImplementation(async (opts: any) => {
|
|
await opts.onStepFinish({});
|
|
return { text: 'ok', toolCalls: [], steps: [] };
|
|
});
|
|
|
|
const result = await runner.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: '',
|
|
userPrompt: '',
|
|
toolSet: {},
|
|
stepBudget: 10,
|
|
telemetryTags: {},
|
|
onStepFinish: () => {
|
|
throw new Error('boom');
|
|
},
|
|
});
|
|
|
|
expect(result.stopReason).toBe('natural');
|
|
});
|
|
|
|
it('forwards telemetryTags.source through experimental_telemetry metadata', async () => {
|
|
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
|
const telemetryConfigEnabled = {
|
|
isEnabled: () => true,
|
|
devtoolsEnabled: false,
|
|
appSettingsService: {
|
|
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
|
|
},
|
|
systemConfigService: {
|
|
config: { instance: { name: 'test-instance' } },
|
|
},
|
|
} as any;
|
|
const runnerWithTelemetry = new AgentRunnerService({
|
|
llmProvider: llmProvider as any,
|
|
telemetry: {
|
|
createTelemetry: (tags) => ({
|
|
isEnabled: telemetryConfigEnabled.isEnabled(),
|
|
metadata: {
|
|
source: tags.source ?? 'RESEARCH',
|
|
jobId: tags.jobId,
|
|
unitKey: tags.unitKey,
|
|
},
|
|
}),
|
|
},
|
|
});
|
|
await runnerWithTelemetry.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: '',
|
|
userPrompt: '',
|
|
toolSet: {},
|
|
stepBudget: 10,
|
|
telemetryTags: { source: 'metabase', jobId: 'job-123', unitKey: 'u/1' },
|
|
});
|
|
const call = (generateText as any).mock.calls[0][0];
|
|
expect(call.experimental_telemetry.metadata.source).toBe('metabase');
|
|
});
|
|
|
|
it('defaults to source=RESEARCH when telemetryTags omits source', async () => {
|
|
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
|
const telemetryConfigEnabled = {
|
|
isEnabled: () => true,
|
|
devtoolsEnabled: false,
|
|
appSettingsService: {
|
|
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
|
|
},
|
|
systemConfigService: {
|
|
config: { instance: { name: 'test-instance' } },
|
|
},
|
|
} as any;
|
|
const runnerWithTelemetry = new AgentRunnerService({
|
|
llmProvider: llmProvider as any,
|
|
telemetry: {
|
|
createTelemetry: (tags) => ({
|
|
isEnabled: telemetryConfigEnabled.isEnabled(),
|
|
metadata: {
|
|
source: tags.source ?? 'RESEARCH',
|
|
jobId: tags.jobId,
|
|
unitKey: tags.unitKey,
|
|
},
|
|
}),
|
|
},
|
|
});
|
|
await runnerWithTelemetry.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: '',
|
|
userPrompt: '',
|
|
toolSet: {},
|
|
stepBudget: 10,
|
|
telemetryTags: { operationName: 'memory-agent-ingest' },
|
|
});
|
|
const call = (generateText as any).mock.calls[0][0];
|
|
expect(call.experimental_telemetry.metadata.source).toBe('RESEARCH');
|
|
});
|
|
|
|
it('forwards jobId and unitKey through experimental_telemetry metadata', async () => {
|
|
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
|
const telemetryConfigEnabled = {
|
|
isEnabled: () => true,
|
|
devtoolsEnabled: false,
|
|
appSettingsService: {
|
|
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
|
|
},
|
|
systemConfigService: {
|
|
config: { instance: { name: 'test-instance' } },
|
|
},
|
|
} as any;
|
|
const runnerWithTelemetry = new AgentRunnerService({
|
|
llmProvider: llmProvider as any,
|
|
telemetry: {
|
|
createTelemetry: (tags) => ({
|
|
isEnabled: telemetryConfigEnabled.isEnabled(),
|
|
metadata: {
|
|
source: tags.source ?? 'RESEARCH',
|
|
jobId: tags.jobId,
|
|
unitKey: tags.unitKey,
|
|
},
|
|
}),
|
|
},
|
|
});
|
|
await runnerWithTelemetry.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: '',
|
|
userPrompt: '',
|
|
toolSet: {},
|
|
stepBudget: 10,
|
|
telemetryTags: { source: 'metabase', jobId: 'job-777', unitKey: 'sources/users' },
|
|
});
|
|
const call = (generateText as any).mock.calls[0][0];
|
|
expect(call.experimental_telemetry.metadata.jobId).toBe('job-777');
|
|
expect(call.experimental_telemetry.metadata.unitKey).toBe('sources/users');
|
|
});
|
|
|
|
it('records a sanitized LLM debug request when a recorder is injected', async () => {
|
|
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
|
const record = vi.fn();
|
|
const provider = {
|
|
...llmProvider,
|
|
cacheMarker: vi.fn((ttl: '5m' | '1h') => ({
|
|
anthropic: { cacheControl: { type: 'ephemeral' as const, ttl } },
|
|
})),
|
|
promptCachingConfig: vi.fn(() => ({
|
|
enabled: true,
|
|
systemTtl: '1h',
|
|
toolsTtl: '1h',
|
|
historyTtl: '5m',
|
|
cacheSystem: true,
|
|
cacheTools: true,
|
|
cacheHistory: true,
|
|
vertexFallbackTo5m: false,
|
|
})),
|
|
};
|
|
const runnerWithDebug = new AgentRunnerService({
|
|
llmProvider: provider as any,
|
|
debugRequestRecorder: { record },
|
|
});
|
|
|
|
await runnerWithDebug.runLoop({
|
|
modelRole: 'candidateExtraction',
|
|
systemPrompt: 'SECRET SYSTEM PROMPT',
|
|
userPrompt: 'SECRET USER PROMPT',
|
|
toolSet: {
|
|
emit_candidate: {
|
|
description: 'SECRET TOOL DESCRIPTION',
|
|
inputSchema: {},
|
|
execute: vi.fn(),
|
|
} as any,
|
|
},
|
|
stepBudget: 10,
|
|
telemetryTags: { operationName: 'ingest-bundle-wu', source: 'metabase', jobId: 'job-1', unitKey: 'cards/1' },
|
|
});
|
|
|
|
expect(record).toHaveBeenCalledTimes(1);
|
|
expect(record).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
operationName: 'ingest-bundle-wu',
|
|
source: 'metabase',
|
|
jobId: 'job-1',
|
|
unitKey: 'cards/1',
|
|
modelRole: 'candidateExtraction',
|
|
modelId: 'claude-sonnet-4-6',
|
|
messageCount: 2,
|
|
toolNames: ['emit_candidate'],
|
|
}),
|
|
);
|
|
const providerOptions = record.mock.calls[0][0].providerOptions;
|
|
expect(providerOptions).toEqual(
|
|
expect.arrayContaining([
|
|
expect.objectContaining({ target: 'message', index: 0, role: 'system' }),
|
|
expect.objectContaining({ target: 'message-part', index: 1, role: 'user', partIndex: 0 }),
|
|
expect.objectContaining({ target: 'tool', name: 'emit_candidate' }),
|
|
]),
|
|
);
|
|
expect(providerOptions).toHaveLength(3);
|
|
const serialized = JSON.stringify(record.mock.calls[0][0]);
|
|
expect(serialized).not.toContain('SECRET SYSTEM PROMPT');
|
|
expect(serialized).not.toContain('SECRET USER PROMPT');
|
|
expect(serialized).not.toContain('SECRET TOOL DESCRIPTION');
|
|
});
|
|
});
|