ktx/packages/cli/test/setup-models.test.ts

1125 lines
40 KiB
TypeScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { initKtxProject } from '../src/context/project/project.js';
import { parseKtxProjectConfig } from '../src/context/project/config.js';
import { readKtxSetupState, writeKtxSetupState } from '../src/context/project/setup-config.js';
2026-05-10 23:12:26 +02:00
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
2026-05-10 23:51:24 +02:00
type KtxSetupModelPromptAdapter,
runKtxSetupAnthropicModelStep,
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
} from '../src/setup-models.js';
2026-05-10 23:12:26 +02:00
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: true,
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function makeSpinnerEvents() {
const events: string[] = [];
const spinner = vi.fn(() => ({
start: (msg: string) => events.push(`start:${msg}`),
message: (msg: string) => events.push(`message:${msg}`),
stop: (msg: string) => events.push(`stop:${msg}`),
error: (msg: string) => events.push(`error:${msg}`),
}));
return { events, spinner };
}
2026-05-10 23:12:26 +02:00
function makePromptAdapter(options: {
providerChoice?: string;
2026-05-10 23:12:26 +02:00
selectValues?: string[];
credentialChoice?: string;
modelChoice?: string;
textValues?: string[];
passwordValue?: string;
passwordValues?: Array<string | undefined>;
2026-05-10 23:51:24 +02:00
}): KtxSetupModelPromptAdapter {
2026-05-10 23:12:26 +02:00
const selectValues = [...(options.selectValues ?? [])];
const textValues = [...(options.textValues ?? [])];
const passwordValues = [...(options.passwordValues ?? [])];
let providerPromptCount = 0;
const choose = async ({ message }: { message: string }) => {
if (message.includes('LLM provider')) {
providerPromptCount += 1;
const nextProviderChoice = selectValues[0];
if (
nextProviderChoice === 'anthropic' ||
nextProviderChoice === 'vertex' ||
nextProviderChoice === 'claude-code' ||
feat: add codex llm backend for ktx runtime work (#253) * feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that *-codex ids require API-key auth. * fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-*/codex-* ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior.
2026-06-02 13:57:11 +02:00
nextProviderChoice === 'codex' ||
nextProviderChoice === 'back'
) {
return selectValues.shift() ?? nextProviderChoice;
2026-05-10 23:12:26 +02:00
}
if (options.credentialChoice === 'back' && providerPromptCount > 1) {
return 'back';
2026-05-10 23:12:26 +02:00
}
return options.providerChoice ?? 'anthropic';
}
const nextValue = selectValues.shift();
if (nextValue) {
return nextValue;
}
if (message.includes('Anthropic API key')) {
return options.credentialChoice ?? 'env';
}
return options.modelChoice ?? 'claude-sonnet-4-6';
};
return {
select: vi.fn(choose),
autocomplete: vi.fn(choose),
2026-05-10 23:12:26 +02:00
text: vi.fn(async () => textValues.shift() ?? ''),
password: vi.fn(
async () =>
passwordValues.length > 0 ? passwordValues.shift() : options.passwordValue ?? 'sk-ant-pasted', // pragma: allowlist secret
),
2026-05-10 23:12:26 +02:00
cancel: vi.fn(),
};
}
const anthropicPreset = {
default: 'claude-sonnet-4-6',
triage: 'claude-haiku-4-5',
candidateExtraction: 'claude-sonnet-4-6',
curator: 'claude-opus-4-7',
reconcile: 'claude-opus-4-7',
repair: 'claude-haiku-4-5',
};
const claudeCodePreset = {
default: 'sonnet',
triage: 'haiku',
candidateExtraction: 'sonnet',
curator: 'opus',
reconcile: 'opus',
repair: 'haiku',
};
const codexPreset = {
default: 'gpt-5.5',
triage: 'gpt-5.5',
candidateExtraction: 'gpt-5.5',
curator: 'gpt-5.5',
reconcile: 'gpt-5.5',
repair: 'gpt-5.5',
};
2026-05-10 23:12:26 +02:00
describe('setup Anthropic model step', () => {
let tempDir: string;
beforeEach(async () => {
2026-05-10 23:51:24 +02:00
tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-models-'));
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:12:26 +02:00
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('offers Anthropic provider paths in the preferred order', async () => {
const prompts = makePromptAdapter({ providerChoice: 'back' });
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
makeIo().io,
{ prompts, env: {} },
);
expect(result.status).toBe('back');
expect(prompts.select).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which LLM provider should KTX use?'),
options: [
{ value: 'claude-code', label: 'Claude subscription (Pro/Max)' },
feat: add codex llm backend for ktx runtime work (#253) * feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that *-codex ids require API-key auth. * fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-*/codex-* ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior.
2026-06-02 13:57:11 +02:00
{ value: 'codex', label: 'Codex subscription' },
{ value: 'anthropic', label: 'Anthropic API key' },
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
{ value: 'back', label: 'Back' },
],
}),
);
});
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
it('configures Claude Code backend and validates local auth', async () => {
const io = makeIo();
const authProbe = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'claude-code',
skipLlm: false,
},
io.io,
{ claudeCodeAuthProbe: authProbe },
);
expect(result.status).toBe('ready');
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm).toMatchObject({
provider: { backend: 'claude-code' },
models: claudeCodePreset,
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
});
expect(authProbe).toHaveBeenCalledTimes(3);
expect(authProbe).toHaveBeenNthCalledWith(1, expect.objectContaining({ projectDir: tempDir, model: 'sonnet' }));
expect(authProbe).toHaveBeenNthCalledWith(2, expect.objectContaining({ projectDir: tempDir, model: 'haiku' }));
expect(authProbe).toHaveBeenNthCalledWith(3, expect.objectContaining({ projectDir: tempDir, model: 'opus' }));
});
it('does not prompt for a Claude Code model during interactive setup', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['claude-code'] });
const authProbe = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{ prompts, claudeCodeAuthProbe: authProbe },
);
expect(result.status).toBe('ready');
expect(prompts.select).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which LLM provider should KTX use?'),
}),
);
expect(prompts.select).not.toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which Claude Code model should KTX use?'),
}),
);
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm.models).toMatchObject(claudeCodePreset);
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
});
feat: add codex llm backend for ktx runtime work (#253) * feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that *-codex ids require API-key auth. * fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-*/codex-* ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior.
2026-06-02 13:57:11 +02:00
it('configures Codex backend and validates local auth', async () => {
const io = makeIo();
const codexAuthProbe = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'codex',
skipLlm: false,
},
io.io,
{ codexAuthProbe },
);
expect(result.status).toBe('ready');
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm).toMatchObject({
provider: { backend: 'codex' },
models: codexPreset,
feat: add codex llm backend for ktx runtime work (#253) * feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that *-codex ids require API-key auth. * fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-*/codex-* ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior.
2026-06-02 13:57:11 +02:00
});
expect(codexAuthProbe).toHaveBeenCalledTimes(1);
feat: add codex llm backend for ktx runtime work (#253) * feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that *-codex ids require API-key auth. * fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-*/codex-* ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior.
2026-06-02 13:57:11 +02:00
expect(codexAuthProbe).toHaveBeenCalledWith(expect.objectContaining({ projectDir: tempDir, model: 'gpt-5.5' }));
// The warning carries the clack gutter so it renders inside the setup frame.
expect(io.stderr()).toContain('│ Codex backend isolation is limited');
expect(io.stderr()).toContain('may still load user Codex config');
});
it('defaults the Codex model to gpt-5.5 when none is provided non-interactively', async () => {
const io = makeIo();
const codexAuthProbe = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'codex',
skipLlm: false,
},
io.io,
{ codexAuthProbe },
);
expect(result.status).toBe('ready');
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm).toMatchObject({
provider: { backend: 'codex' },
models: codexPreset,
feat: add codex llm backend for ktx runtime work (#253) * feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that *-codex ids require API-key auth. * fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-*/codex-* ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior.
2026-06-02 13:57:11 +02:00
});
expect(codexAuthProbe).toHaveBeenCalledTimes(1);
feat: add codex llm backend for ktx runtime work (#253) * feat: add codex sdk runner foundation * feat: parse codex runtime events * feat: expose codex runtime mcp tools * feat: add codex llm runtime * feat: wire codex llm backend * test: avoid Array.fromAsync in codex runner test * docs: document codex llm backend * fix: tighten codex runtime config ownership * fix: use codex sdk env and thread options * fix: parse codex sdk event shapes * test: add codex backend live smoke * docs: clarify codex backend isolation * fix: drive codex loop metrics from mcp events * fix: enforce codex local step budget * docs: disclose codex isolation limits * fix: count all codex agent steps and stream step callbacks live The agent-loop step budget only counted completed mcp_tool_call items, so built-in command_execution steps (which the public Codex SDK/CLI surface can still expose) never decremented the budget, letting ingest/reconciliation run past stepBudget until Codex stopped on its own. onStepFinish was also replayed only after the whole stream drained, so live work_unit_step / reconciliation progress appeared stuck until the Codex process exited. collectEvents is now the single live step accumulator: it counts every completed agent-action item via a shared isCompletedAgentStep predicate (command_execution, mcp_tool_call, file_change, web_search), fires onStepFinish as each step completes, and enforces the budget on that broader count. A no-tool turn still counts as one step. toolFailures stays MCP-specific, since a non-zero command exit is normal agent exploration, not a loop failure. * test: align ingest llm-guard assertions with codex backend The skip-llm ingest guard message now lists codex as a valid backend and mentions a Claude Code/Codex session plus a codex setup hint, but this slow suite test still asserted the pre-codex wording. Update it to match the production message (already covered by the local-bundle-runtime unit test) and add the codex setup-line assertion. * fix: treat codex error:null tool calls as success The Codex SDK serializes error: null on successful mcp_tool_call items, so the failure check (item.error !== undefined) flagged every successful tool call as failed with the empty-payload default "Codex turn failed". This killed every ingest work unit under the codex backend before it could produce a patch. Key on status === 'failed' (authoritative, always set) and only treat a populated error object as a failure. Add a regression test built from a verbatim real-SDK event capture. * fix: default codex backend to gpt-5.5 and report real probe errors The previous default gpt-5.3-codex is an API-key-only model that the OpenAI API rejects under ChatGPT-account (subscription) auth, so codex status/setup failed with a misleading "authentication is not usable" message even though auth was fine. - Default codex model is now gpt-5.5 (works on both subscription and API-key auth); the curated setup picker offers gpt-5.5 / gpt-5.4 / gpt-5.4-mini and keeps free-form entry for account-specific ids (e.g. gpt-5.3-codex-spark). - runCodexAuthProbe now distinguishes "model not available" from an auth failure and surfaces the real API error: collectEvents retains stream events when the SDK throws on a non-zero exit, and the API error JSON envelope is unwrapped to its human-readable message. - The Codex isolation warning now renders inside the clack setup frame. - Docs updated to gpt-5.5 with a note that *-codex ids require API-key auth. * fix: require llm.models.default in status and match codex probe remediation Status reported a project ready when a non-none LLM backend was configured without llm.models.default, but the runtime (resolveModelSlots) hard-requires it, so ingest/scan/memory threw after `ktx status` said the project was usable. buildLlmStatus now fails for any non-none backend missing models.default and no longer invents a fallback model for claude-code/codex. Codex probe failures now carry a category-matched fix: a model-access failure steers the user at llm.models.default instead of the auth/install remediation. runCodexAuthProbe returns the fix and status consumes it; the message stays self-sufficient so setup output is unchanged. Docs: README now lists the codex backend and local Codex auth; ktx-setup.mdx states --llm-model only accepts codex/default or gpt-*/codex-* ids. Repaired four doctor fixtures that configured a backend without models.default (the now-correctly-blocked config) and added coverage for the new behavior.
2026-06-02 13:57:11 +02:00
expect(codexAuthProbe).toHaveBeenCalledWith(expect.objectContaining({ projectDir: tempDir, model: 'gpt-5.5' }));
});
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
it('warns during Claude Code setup when existing prompt-caching fields will be ignored', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'llm:',
' provider:',
' backend: anthropic',
' models:',
' default: claude-sonnet-4-6',
' promptCaching:',
' enabled: true',
' systemTtl: 1h',
' toolsTtl: 1h',
' historyTtl: 5m',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'claude-code',
skipLlm: false,
},
io.io,
{
claudeCodeAuthProbe: async () => ({ ok: true as const }),
},
);
expect(result.status).toBe('ready');
expect(io.stderr()).toContain('claude-code ignores llm.promptCaching.systemTtl');
expect(io.stderr()).toContain('Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers');
});
it('returns from Anthropic credential Back to provider selection', async () => {
const prompts = makePromptAdapter({ selectValues: ['anthropic', 'back', 'back'] });
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
makeIo().io,
{ prompts, env: {} },
);
expect(result.status).toBe('back');
expect(prompts.select).toHaveBeenNthCalledWith(
3,
expect.objectContaining({
message: expect.stringContaining('Which LLM provider should KTX use?'),
}),
);
});
2026-05-10 23:12:26 +02:00
it('configures env credentials, selected model, prompt caching, and llm completion state', async () => {
const io = makeIo();
const { events: spinnerEvents, spinner } = makeSpinnerEvents();
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{
projectDir: tempDir,
inputMode: 'disabled',
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
skipLlm: false,
},
io.io,
{
env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
healthCheck: vi.fn(async () => ({ ok: true as const })),
spinner,
2026-05-10 23:12:26 +02:00
},
);
expect(result.status).toBe('ready');
2026-05-10 23:51:24 +02:00
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
2026-05-10 23:12:26 +02:00
expect(config.llm).toMatchObject({
provider: {
backend: 'anthropic',
anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
},
models: anthropicPreset,
2026-05-10 23:12:26 +02:00
promptCaching: { enabled: true },
});
expect(config.scan.enrichment.mode).toBe('llm');
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('llm');
expect(spinnerEvents).toEqual([
'start:Checking Anthropic API LLM (claude-sonnet-4-6).',
'stop:LLM test passed (Anthropic API, claude-sonnet-4-6)',
'start:Checking Anthropic API LLM (claude-haiku-4-5).',
'stop:LLM test passed (Anthropic API, claude-haiku-4-5)',
'start:Checking Anthropic API LLM (claude-opus-4-7).',
'stop:LLM test passed (Anthropic API, claude-opus-4-7)',
]);
2026-05-10 23:12:26 +02:00
expect(io.stdout()).toContain('LLM ready: yes');
expect(io.stdout()).not.toContain('sk-ant-test');
});
it('degrades unavailable Anthropic non-anchor models to the anchor before persisting', async () => {
const io = makeIo();
const { events: spinnerEvents, spinner } = makeSpinnerEvents();
const healthCheck = vi
.fn()
.mockResolvedValueOnce({ ok: true as const })
.mockResolvedValueOnce({ ok: false as const, message: 'model not enabled' })
.mockResolvedValueOnce({ ok: true as const });
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret
skipLlm: false,
},
io.io,
{
env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret
healthCheck,
spinner,
},
);
expect(result.status).toBe('ready');
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm.models).toMatchObject({
default: 'claude-sonnet-4-6',
triage: 'claude-sonnet-4-6',
candidateExtraction: 'claude-sonnet-4-6',
curator: 'claude-opus-4-7',
reconcile: 'claude-opus-4-7',
repair: 'claude-sonnet-4-6',
});
expect(io.stderr()).toContain(
'LLM model claude-haiku-4-5 is unavailable for triage, repair; using claude-sonnet-4-6 for those roles.',
);
expect(spinnerEvents).toEqual([
'start:Checking Anthropic API LLM (claude-sonnet-4-6).',
'stop:LLM test passed (Anthropic API, claude-sonnet-4-6)',
'start:Checking Anthropic API LLM (claude-haiku-4-5).',
'error:LLM test failed',
'start:Checking Anthropic API LLM (claude-opus-4-7).',
'stop:LLM test passed (Anthropic API, claude-opus-4-7)',
]);
});
it('configures Vertex AI provider, selected model, prompt caching, and llm completion state', async () => {
const io = makeIo();
const healthCheck = vi.fn(async () => ({ ok: true as const }));
const { events: spinnerEvents, spinner } = makeSpinnerEvents();
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'vertex',
vertexProject: 'local-gcp-project',
vertexLocation: 'us-east5',
skipLlm: false,
},
io.io,
{ env: {}, healthCheck, spinner },
);
expect(result.status).toBe('ready');
expect(healthCheck).toHaveBeenNthCalledWith(1, {
backend: 'vertex',
vertex: { project: 'local-gcp-project', location: 'us-east5' },
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: true, vertexFallbackTo5m: true },
});
expect(healthCheck).toHaveBeenNthCalledWith(2, {
backend: 'vertex',
vertex: { project: 'local-gcp-project', location: 'us-east5' },
modelSlots: { default: 'claude-haiku-4-5' },
promptCaching: { enabled: true, vertexFallbackTo5m: true },
});
expect(healthCheck).toHaveBeenNthCalledWith(3, {
backend: 'vertex',
vertex: { project: 'local-gcp-project', location: 'us-east5' },
modelSlots: { default: 'claude-opus-4-7' },
promptCaching: { enabled: true, vertexFallbackTo5m: true },
});
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm).toMatchObject({
provider: {
backend: 'vertex',
vertex: { project: 'local-gcp-project', location: 'us-east5' },
},
models: anthropicPreset,
promptCaching: { enabled: true, vertexFallbackTo5m: true },
});
expect(config.scan.enrichment.mode).toBe('llm');
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('llm');
expect(spinnerEvents).toEqual([
'start:Checking Vertex AI LLM (claude-sonnet-4-6).',
'stop:LLM test passed (Vertex AI, claude-sonnet-4-6)',
'start:Checking Vertex AI LLM (claude-haiku-4-5).',
'stop:LLM test passed (Vertex AI, claude-haiku-4-5)',
'start:Checking Vertex AI LLM (claude-opus-4-7).',
'stop:LLM test passed (Vertex AI, claude-opus-4-7)',
]);
expect(io.stdout()).toContain('LLM ready: yes (claude-sonnet-4-6)');
});
it('uses existing Vertex AI credentials without an extra auth choice', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['vertex', 'local-gcp-project'] });
const readGcloudProject = vi.fn(async () => 'local-gcp-project');
const listGcloudProjects = vi.fn(async () => [
{ projectId: 'local-gcp-project', name: 'Local project' },
{ projectId: 'other-gcp-project', name: 'Other project' },
]);
const healthCheck = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{
prompts,
env: {},
readGcloudProject,
listGcloudProjects,
healthCheck,
},
);
expect(result.status).toBe('ready');
expect(prompts.select).not.toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('How should KTX authenticate with Google Vertex AI?'),
}),
);
expect(readGcloudProject).toHaveBeenCalled();
expect(listGcloudProjects).toHaveBeenCalled();
expect(prompts.text).not.toHaveBeenCalled();
expect(prompts.autocomplete).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which Google Cloud project should KTX use for Vertex AI?'),
options: [
{ value: 'local-gcp-project', label: 'local-gcp-project - Local project (current gcloud project)' },
{ value: 'other-gcp-project', label: 'other-gcp-project - Other project' },
{ value: 'manual', label: 'Enter a project ID manually' },
{ value: 'back', label: 'Back' },
],
}),
);
expect(healthCheck).toHaveBeenCalledWith({
backend: 'vertex',
vertex: { project: 'local-gcp-project', location: 'us-east5' },
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: true, vertexFallbackTo5m: true },
});
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm.provider).toMatchObject({
backend: 'vertex',
vertex: { project: 'local-gcp-project', location: 'us-east5' },
});
});
it('skips the Vertex AI auth choice when Application Default Credentials are the only option', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['vertex', 'local-gcp-project'] });
const healthCheck = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{
prompts,
env: {},
readGcloudProject: vi.fn(async () => 'local-gcp-project'),
listGcloudProjects: vi.fn(async () => [{ projectId: 'local-gcp-project', name: 'Local project' }]),
healthCheck,
},
);
expect(result.status).toBe('ready');
expect(prompts.select).not.toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('How should KTX authenticate with Google Vertex AI?'),
}),
);
expect(prompts.autocomplete).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which Google Cloud project should KTX use for Vertex AI?'),
}),
);
expect(healthCheck).toHaveBeenCalledWith(
expect.objectContaining({
backend: 'vertex',
vertex: { project: 'local-gcp-project', location: 'us-east5' },
}),
);
});
it('lets users choose a different visible gcloud project for Vertex AI', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['vertex', 'other-gcp-project'] });
const healthCheck = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{
prompts,
env: {},
readGcloudProject: vi.fn(async () => 'current-gcp-project'),
listGcloudProjects: vi.fn(async () => [
{ projectId: 'current-gcp-project', name: 'Current project' },
{ projectId: 'other-gcp-project', name: 'Other project' },
]),
healthCheck,
},
);
expect(result.status).toBe('ready');
expect(healthCheck).toHaveBeenCalledWith({
backend: 'vertex',
vertex: { project: 'other-gcp-project', location: 'us-east5' },
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: true, vertexFallbackTo5m: true },
});
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm.provider).toMatchObject({
backend: 'vertex',
vertex: { project: 'other-gcp-project', location: 'us-east5' },
});
});
it('allows manual Vertex AI project entry when gcloud project listing is empty', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['vertex', 'manual'], textValues: ['manual-gcp-project'] });
const healthCheck = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{
prompts,
env: {},
readGcloudProject: vi.fn(async () => undefined),
listGcloudProjects: vi.fn(async () => []),
healthCheck,
},
);
expect(result.status).toBe('ready');
expect(prompts.autocomplete).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which Google Cloud project should KTX use for Vertex AI?'),
options: [
{ value: 'manual', label: 'Enter a project ID manually' },
{ value: 'back', label: 'Back' },
],
}),
);
expect(prompts.text).toHaveBeenCalledWith(
expect.objectContaining({
message: 'Google Cloud project ID\n│ Press Escape to go back.\n│',
}),
);
expect(healthCheck).toHaveBeenCalledWith(
expect.objectContaining({
vertex: { project: 'manual-gcp-project', location: 'us-east5' },
}),
);
});
it('lets users retry Vertex AI project listing after gcloud auth fails', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['vertex', 'retry', 'other-gcp-project'] });
const listGcloudProjects = vi
.fn()
.mockRejectedValueOnce(new Error('Reauthentication failed. cannot prompt during non-interactive execution.'))
.mockResolvedValueOnce([
{ projectId: 'local-gcp-project', name: 'Local project' },
{ projectId: 'other-gcp-project', name: 'Other project' },
]);
const healthCheck = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{
prompts,
env: {},
readGcloudProject: vi.fn(async () => 'local-gcp-project'),
listGcloudProjects,
healthCheck,
},
);
expect(result.status).toBe('ready');
expect(listGcloudProjects).toHaveBeenCalledTimes(2);
expect(prompts.autocomplete).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Could not list Google Cloud projects with gcloud'),
options: expect.arrayContaining([{ value: 'retry', label: 'Retry loading Google Cloud projects' }]),
}),
);
expect(prompts.autocomplete).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining(
`${String.fromCharCode(0x1b)}[33mCould not list Google Cloud projects with gcloud`,
),
}),
);
expect(prompts.autocomplete).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('gcloud auth login --update-adc'),
}),
);
expect(prompts.autocomplete).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining(
`${String.fromCharCode(0x1b)}[33mRun \`gcloud auth login --update-adc\``,
),
}),
);
expect(healthCheck).toHaveBeenCalledWith(
expect.objectContaining({
vertex: { project: 'other-gcp-project', location: 'us-east5' },
}),
);
});
it('returns from Vertex AI project selection Back to provider selection', async () => {
const prompts = makePromptAdapter({ selectValues: ['vertex', 'back', 'back'] });
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
makeIo().io,
{
prompts,
env: {},
readGcloudProject: vi.fn(async () => 'current-gcp-project'),
listGcloudProjects: vi.fn(async () => [{ projectId: 'current-gcp-project', name: 'Current project' }]),
},
);
expect(result.status).toBe('back');
expect(prompts.select).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
message: expect.stringContaining('Which LLM provider should KTX use?'),
}),
);
});
it('explains common Vertex AI Forbidden health-check causes', async () => {
const io = makeIo();
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'vertex',
vertexProject: 'kaelio-orbit-looker-20260430',
vertexLocation: 'us-east5',
skipLlm: false,
},
io.io,
{
env: {},
healthCheck: vi.fn(async () => ({ ok: false as const, message: 'Forbidden' })),
},
);
expect(result.status).toBe('failed');
expect(io.stderr()).toContain('project kaelio-orbit-looker-20260430');
expect(io.stderr()).toContain('Vertex AI API is enabled');
expect(io.stderr()).toContain('Anthropic Claude model access');
expect(io.stderr()).toContain('roles/aiplatform.user');
});
2026-05-10 23:12:26 +02:00
it('resolves --anthropic-api-key-file for health checks and stores a file reference', async () => {
const io = makeIo();
const secretPath = join(tempDir, 'anthropic-api-key');
await writeFile(secretPath, 'sk-ant-file', 'utf-8'); // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
const healthCheck = vi.fn(async () => ({ ok: true as const }));
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{
projectDir: tempDir,
inputMode: 'disabled',
anthropicApiKeyFile: secretPath,
skipLlm: false,
},
io.io,
{ env: {}, healthCheck },
);
expect(result.status).toBe('ready');
expect(healthCheck).toHaveBeenNthCalledWith(
1,
2026-05-10 23:12:26 +02:00
expect.objectContaining({
anthropic: { apiKey: 'sk-ant-file' }, // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
modelSlots: { default: 'claude-sonnet-4-6' },
}),
);
expect(healthCheck).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
anthropic: { apiKey: 'sk-ant-file' }, // pragma: allowlist secret
modelSlots: { default: 'claude-haiku-4-5' },
}),
);
expect(healthCheck).toHaveBeenNthCalledWith(
3,
expect.objectContaining({
anthropic: { apiKey: 'sk-ant-file' }, // pragma: allowlist secret
modelSlots: { default: 'claude-opus-4-7' },
}),
);
2026-05-10 23:51:24 +02:00
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
2026-05-10 23:12:26 +02:00
expect(config.llm).toMatchObject({
provider: {
backend: 'anthropic',
anthropic: { api_key: `file:${secretPath}` }, // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
},
models: anthropicPreset,
2026-05-10 23:12:26 +02:00
});
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('llm');
2026-05-10 23:12:26 +02:00
expect(io.stdout()).not.toContain('sk-ant-file');
});
it('returns missing-input when --anthropic-api-key-file points to a missing file', async () => {
const io = makeIo();
const missingSecretPath = join(tempDir, 'missing-anthropic-api-key');
const healthCheck = vi.fn(async () => ({ ok: true as const }));
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{
projectDir: tempDir,
inputMode: 'disabled',
anthropicApiKeyFile: missingSecretPath,
skipLlm: false,
},
io.io,
{ env: {}, healthCheck },
);
expect(result.status).toBe('missing-input');
expect(healthCheck).not.toHaveBeenCalled();
expect(io.stderr()).toContain(`Missing Anthropic API key file: ${missingSecretPath}`);
});
fix(cli): clear error when ktx setup has no LLM backend under --no-input (#281) * fix(cli): fail clearly when ktx setup has no LLM backend under --no-input Non-interactive `ktx setup` silently defaulted the LLM backend to `anthropic` and then failed with `Missing Anthropic API key: pass --anthropic-api-key-env or --anthropic-api-key-file` — confusing for users who selected a different provider (e.g. `--target claude-code`) and never asked for the Anthropic API backend. That silent default could never succeed: it was reached only when no backend, Anthropic key, or Vertex flag was supplied, and in exactly that case the Anthropic credential resolver always failed (no env fallback in disabled mode). Unlike embeddings, the LLM has no credential-free default (anthropic needs a key, vertex needs gcloud ADC, claude-code/codex need a logged-in local CLI), so there is nothing safe to assume. `chooseBackend` now fails clearly in disabled mode with no backend, naming the (hidden) `--llm-backend` flag and its choices and noting each backend's credential needs. `--llm-backend` stays hidden in `--help`, consistent with the rest of the documented automation surface; the error message is the discovery path. - Add a unit test (no backend, disabled -> clear message) and a CLI/integration test (`--target claude-code --no-input` -> exit 1, clear message, not the Anthropic red herring). - Document the no-default behavior and add a Common-errors row in docs-site ktx-setup.mdx. * refactor(cli): single source of truth for setup LLM backends The set of LLM backends a user can pick during `ktx setup` (claude-code, codex, anthropic, vertex) was hand-enumerated in five places: the `--llm-backend` arg parser, the `KtxSetupLlmBackend` union, the interactive prompt's narrowing, the prompt options, and the missing-backend error. Only some had TypeScript coverage, so adding a backend could silently drift (e.g. a valid value rejected by the parser, or routed to anthropic by the prompt's `? : 'anthropic'` fallback). Collapse them onto one `KTX_SETUP_LLM_BACKENDS` list: - `KtxSetupLlmBackend` is derived from it. - `isKtxSetupLlmBackend` is the shared validator; the arg parser and the prompt both route through it instead of re-listing literals. - The prompt options derive from the list, with a `Record<KtxSetupLlmBackend, string>` label map so a new backend fails to compile until it has a label. - The missing-backend error builds its choice list from the same source. Behavior-preserving: identical accepted values and parse error, identical prompt options (asserted by an existing test), and the prompt's unreachable fallback now cancels rather than silently assuming anthropic.
2026-06-09 19:11:39 +02:00
it('fails clearly when non-interactive setup has no LLM backend instead of assuming Anthropic', async () => {
2026-05-10 23:12:26 +02:00
const io = makeIo();
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'disabled', skipLlm: false },
io.io,
);
expect(result.status).toBe('missing-input');
fix(cli): clear error when ktx setup has no LLM backend under --no-input (#281) * fix(cli): fail clearly when ktx setup has no LLM backend under --no-input Non-interactive `ktx setup` silently defaulted the LLM backend to `anthropic` and then failed with `Missing Anthropic API key: pass --anthropic-api-key-env or --anthropic-api-key-file` — confusing for users who selected a different provider (e.g. `--target claude-code`) and never asked for the Anthropic API backend. That silent default could never succeed: it was reached only when no backend, Anthropic key, or Vertex flag was supplied, and in exactly that case the Anthropic credential resolver always failed (no env fallback in disabled mode). Unlike embeddings, the LLM has no credential-free default (anthropic needs a key, vertex needs gcloud ADC, claude-code/codex need a logged-in local CLI), so there is nothing safe to assume. `chooseBackend` now fails clearly in disabled mode with no backend, naming the (hidden) `--llm-backend` flag and its choices and noting each backend's credential needs. `--llm-backend` stays hidden in `--help`, consistent with the rest of the documented automation surface; the error message is the discovery path. - Add a unit test (no backend, disabled -> clear message) and a CLI/integration test (`--target claude-code --no-input` -> exit 1, clear message, not the Anthropic red herring). - Document the no-default behavior and add a Common-errors row in docs-site ktx-setup.mdx. * refactor(cli): single source of truth for setup LLM backends The set of LLM backends a user can pick during `ktx setup` (claude-code, codex, anthropic, vertex) was hand-enumerated in five places: the `--llm-backend` arg parser, the `KtxSetupLlmBackend` union, the interactive prompt's narrowing, the prompt options, and the missing-backend error. Only some had TypeScript coverage, so adding a backend could silently drift (e.g. a valid value rejected by the parser, or routed to anthropic by the prompt's `? : 'anthropic'` fallback). Collapse them onto one `KTX_SETUP_LLM_BACKENDS` list: - `KtxSetupLlmBackend` is derived from it. - `isKtxSetupLlmBackend` is the shared validator; the arg parser and the prompt both route through it instead of re-listing literals. - The prompt options derive from the list, with a `Record<KtxSetupLlmBackend, string>` label map so a new backend fails to compile until it has a label. - The missing-backend error builds its choice list from the same source. Behavior-preserving: identical accepted values and parse error, identical prompt options (asserted by an existing test), and the prompt's unreachable fallback now cancels rather than silently assuming anthropic.
2026-06-09 19:11:39 +02:00
const stderr = io.stderr();
expect(stderr).toContain('Missing LLM backend: pass --llm-backend');
// Names every backend so the user can choose without reading hidden --help flags.
expect(stderr).toContain('claude-code');
expect(stderr).toContain('codex');
expect(stderr).toContain('anthropic');
expect(stderr).toContain('vertex');
// Does not mislead with an Anthropic-key error the user never opted into.
expect(stderr).not.toContain('Missing Anthropic API key');
// Does not nudge users to skip the LLM.
expect(stderr).not.toContain('--skip-llm');
2026-05-10 23:12:26 +02:00
});
2026-05-10 23:51:24 +02:00
it('writes pasted keys to .ktx/secrets and never prints the key', async () => {
2026-05-10 23:12:26 +02:00
const io = makeIo();
const prompts = makePromptAdapter({
credentialChoice: 'paste',
passwordValue: 'sk-ant-pasted', // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
});
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{
prompts,
env: {},
healthCheck: vi.fn(async () => ({ ok: true as const })),
},
);
expect(result.status).toBe('ready');
await expect(readFile(join(tempDir, '.ktx/secrets/anthropic-api-key'), 'utf-8')).resolves.toBe('sk-ant-pasted\n'); // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
if (process.platform !== 'win32') {
2026-05-10 23:51:24 +02:00
expect((await stat(join(tempDir, '.ktx/secrets/anthropic-api-key'))).mode & 0o777).toBe(0o600);
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
const yaml = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
2026-05-10 23:12:26 +02:00
expect(yaml).toContain('api_key: file:');
expect(yaml).not.toContain('sk-ant-pasted');
expect(io.stdout()).not.toContain('sk-ant-pasted');
});
it('opens pasted key entry directly and tells users Escape goes back', async () => {
const prompts = makePromptAdapter({
selectValues: ['paste'],
passwordValue: 'sk-ant-pasted', // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
});
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
makeIo().io,
{
prompts,
env: {},
healthCheck: vi.fn(async () => ({ ok: true as const })),
},
);
expect(result.status).toBe('ready');
expect(prompts.select).not.toHaveBeenCalledWith(expect.objectContaining({ message: 'Paste Anthropic API key now?' }));
expect(prompts.password).toHaveBeenCalledWith({
message: 'Anthropic API key\n│ Press Escape to go back.\n│',
2026-05-10 23:12:26 +02:00
});
});
it('does not offer skipping while choosing an Anthropic credential source', async () => {
const prompts = makePromptAdapter({ credentialChoice: 'back' });
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
makeIo().io,
{ prompts, env: {} },
);
expect(result.status).toBe('back');
expect(prompts.select).toHaveBeenCalledWith(
expect.objectContaining({
2026-05-10 23:51:24 +02:00
message: expect.stringContaining('How should KTX find your Anthropic API key?'),
2026-05-10 23:12:26 +02:00
options: expect.not.arrayContaining([expect.objectContaining({ value: 'skip' })]),
}),
);
});
2026-05-10 23:51:24 +02:00
it('explains why KTX asks for an Anthropic API key', async () => {
2026-05-10 23:12:26 +02:00
const io = makeIo();
const prompts = makePromptAdapter({ credentialChoice: 'back' });
const expectedPromptMessage = [
2026-05-10 23:51:24 +02:00
'How should KTX find your Anthropic API key?',
2026-05-10 23:12:26 +02:00
'',
[
2026-05-10 23:51:24 +02:00
'KTX uses the key to verify Anthropic model access now and to run ingest agents that turn schemas, SQL,',
'BI metadata, and docs into semantic-layer sources and wiki context. ktx.yaml stores an env: or file:',
2026-05-10 23:12:26 +02:00
'reference, not the raw key.',
].join(' '),
].join('\n');
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{ prompts, env: {} },
);
expect(result.status).toBe('back');
expect(prompts.select).toHaveBeenCalledWith(
expect.objectContaining({
message: expectedPromptMessage,
}),
);
2026-05-10 23:51:24 +02:00
expect(io.stdout()).not.toContain('KTX uses the key');
2026-05-10 23:12:26 +02:00
});
it('does not persist llm completion when the health check fails', async () => {
const io = makeIo();
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{
projectDir: tempDir,
inputMode: 'disabled',
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
skipLlm: false,
},
io.io,
{
env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
healthCheck: vi.fn(async () => ({ ok: false as const, message: '401 invalid x-api-key [redacted]' })),
},
);
expect(result.status).toBe('failed');
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
2026-05-10 23:12:26 +02:00
expect(io.stderr()).toContain('Anthropic model health check failed: 401 invalid x-api-key [redacted]');
expect(io.stderr()).not.toContain('sk-ant-test');
});
it('re-prompts after an interactive health-check failure and saves after retry success', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['env', 'env'] });
2026-05-10 23:12:26 +02:00
const healthCheck = vi
.fn()
.mockResolvedValueOnce({ ok: false as const, message: 'model not found' })
.mockResolvedValueOnce({ ok: true as const })
.mockResolvedValueOnce({ ok: true as const })
2026-05-10 23:12:26 +02:00
.mockResolvedValueOnce({ ok: true as const });
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{
prompts,
env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
healthCheck,
},
);
expect(result.status).toBe('ready');
expect(healthCheck).toHaveBeenCalledTimes(4);
expect(prompts.select).toHaveBeenCalledTimes(3);
expect(prompts.autocomplete).not.toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which Anthropic model should KTX use?'),
}),
);
2026-05-10 23:12:26 +02:00
expect(io.stderr()).toContain('Anthropic model health check failed: model not found');
expect(io.stderr()).toContain('Choose a different credential source or Back.');
2026-05-10 23:51:24 +02:00
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm.models).toMatchObject(anthropicPreset);
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('llm');
2026-05-10 23:12:26 +02:00
expect(io.stderr()).not.toContain('sk-ant-test');
});
it('leaves setup incomplete when skipped', async () => {
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'disabled', skipLlm: true },
makeIo().io,
);
expect(result.status).toBe('skipped');
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
2026-05-10 23:12:26 +02:00
});
it('returns back without writing config when Back is selected', async () => {
const prompts = makePromptAdapter({ credentialChoice: 'back' });
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
makeIo().io,
{ prompts, env: {} },
);
expect(result.status).toBe('back');
2026-05-10 23:51:24 +02:00
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
2026-05-10 23:12:26 +02:00
expect(config.llm.provider.backend).toBe('none');
});
it('returns from pasted key entry Escape to credential selection and can use env credentials', async () => {
const prompts = makePromptAdapter({ selectValues: ['paste', 'env'], passwordValues: [undefined] });
2026-05-10 23:12:26 +02:00
2026-05-10 23:51:24 +02:00
const result = await runKtxSetupAnthropicModelStep(
2026-05-10 23:12:26 +02:00
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
makeIo().io,
{
prompts,
env: { ANTHROPIC_API_KEY: 'sk-ant-env' }, // pragma: allowlist secret
healthCheck: vi.fn(async () => ({ ok: true as const })),
},
);
expect(result.status).toBe('ready');
expect(prompts.password).toHaveBeenCalledWith({
message: 'Anthropic API key\n│ Press Escape to go back.\n│',
2026-05-10 23:12:26 +02:00
});
2026-05-10 23:51:24 +02:00
await expect(readFile(join(tempDir, '.ktx/secrets/anthropic-api-key'), 'utf-8')).rejects.toMatchObject({
2026-05-10 23:12:26 +02:00
code: 'ENOENT',
});
2026-05-10 23:51:24 +02:00
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
2026-05-10 23:12:26 +02:00
expect(config.llm.provider).toMatchObject({
backend: 'anthropic',
anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
});
});
it('preserves already completed llm setup when no model args request changes', async () => {
2026-05-10 23:51:24 +02:00
await mkdir(join(tempDir, '.ktx'), { recursive: true });
await initKtxProject({ projectDir: tempDir, force: true });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(tempDir, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'setup:',
' database_connection_ids: []',
'connections: {}',
'llm:',
' provider:',
' backend: anthropic',
' anthropic:',
' api_key: env:ANTHROPIC_API_KEY', // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
' models:',
' default: claude-sonnet-4-6',
'ingest:',
' embeddings:',
' backend: none',
2026-05-10 23:12:26 +02:00
' dimensions: 8',
].join('\n'),
'utf-8',
);
await writeKtxSetupState(tempDir, { completed_steps: ['project', 'llm'] });
2026-05-10 23:12:26 +02:00
const healthCheck = vi.fn(async () => ({ ok: true as const }));
await expect(
2026-05-10 23:51:24 +02:00
runKtxSetupAnthropicModelStep({ projectDir: tempDir, inputMode: 'disabled', skipLlm: false }, makeIo().io, {
env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret
2026-05-10 23:12:26 +02:00
healthCheck,
}),
).resolves.toMatchObject({ status: 'ready' });
expect(healthCheck).not.toHaveBeenCalled();
});
it.each([
{
backend: 'vertex',
providerLines: [' backend: vertex', ' vertex:', ' project: kaelio-dev', ' location: us-east5'],
model: 'claude-sonnet-4-6',
},
{
backend: 'gateway',
providerLines: [' backend: gateway', ' gateway:', ' api_key: env:AI_GATEWAY_API_KEY'],
model: 'anthropic/claude-sonnet-4-6',
},
])('preserves already configured $backend llm setup without asking for Anthropic credentials', async (fixture) => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'setup:',
' database_connection_ids: []',
'connections: {}',
'llm:',
' provider:',
...fixture.providerLines,
' models:',
` default: ${fixture.model}`,
'ingest:',
' embeddings:',
' backend: none',
' dimensions: 8',
].join('\n'),
'utf-8',
);
await writeKtxSetupState(tempDir, { completed_steps: ['project', 'llm'] });
const healthCheck = vi.fn(async () => ({ ok: true as const }));
const io = makeIo();
await expect(
runKtxSetupAnthropicModelStep({ projectDir: tempDir, inputMode: 'disabled', skipLlm: false }, io.io, {
healthCheck,
}),
).resolves.toMatchObject({ status: 'ready' });
expect(healthCheck).not.toHaveBeenCalled();
expect(io.stdout()).toContain(`LLM ready: yes (${fixture.model})`);
expect(io.stderr()).not.toContain('Anthropic');
});
2026-05-10 23:12:26 +02:00
});