ktx/packages/cli/src/setup-embeddings.ts
Andrey Avtomonov a11b9e9757
refactor(release): drop release-policy.json runtime dep and next branch (#180)
* chore: standardize daemon naming on "KTX daemon"

Replace inconsistent names ("KTX Python daemon", "KTX local embeddings
daemon", "KTX managed daemon", "Python daemon") with the single name
"KTX daemon" in CLI output, errors, command descriptions, test
assertions, smoke scripts, docs, AGENTS.md, issue templates, and
codecov flags. The daemon is a portable compute server with endpoints
for SQL analysis, semantic layer, LookML, database introspection, and
embeddings; the previous labels misrepresented it as embeddings-only or
exposed implementation details ("Python", "managed").

The "KTX Python runtime" concept (installed interpreter + packages) is
deliberately left as-is — it is a separate concept from the daemon
process.

* refactor(release): drop release-policy.json runtime dep and next branch

Strips the release-policy.json fallback from release-version.ts so the CLI
reads its version straight from packages/cli/package.json. dev → 0.0.0-private,
installed @kaelio/ktx → the real semver baked into the published package.json.
KtxCliPackageInfo collapses to { name, version, contextPackageName }; /health
no longer depends on version files surviving past a CI run.

Replaces the dual-branch (main + next) semantic-release model with a single-
branch model on main. rcs and stables interleave on the same branch via
{ name: 'main', prerelease: 'rc', channel: 'next' } / ['main']. Drops
@semantic-release/git and @semantic-release/changelog (nothing is committed
back to the repo on any channel) and the workflow's "Prepare next prerelease
branch" step plus the KTX_PRERELEASE_BRANCH plumbing. The git tag plus the
published npm artifact carry the version forward.

Updates docs/release.md, removes the two now-unused devDeps, regenerates
pnpm-lock.yaml. 611/611 @ktx/cli tests, 173/173 script tests, type-check,
biome, knip all clean.

* fix(release): don't throw on non-main branches at config-load time

knip loads .releaserc.cjs on every PR run, where GITHUB_REF_NAME is the
merge ref (e.g. 180/merge). The previous version of releaseBranches threw
immediately when the branch wasn't main, which made knip fail to evaluate
the config and then mis-flag @semantic-release/exec as an unused dep.

semantic-release already refuses to publish when the current branch doesn't
match a configured release branch, so the explicit throw was redundant.
Drop it (and the unused currentBranch helper) and replace the
"rejects releases from non-main" assertion with one that exercises a CI-
shaped GITHUB_REF_NAME and confirms the config loads.
2026-05-20 13:53:14 +02:00

495 lines
17 KiB
TypeScript

import { readFile, writeFile } from 'node:fs/promises';
import { resolveKtxConfigReference } from '@ktx/context/core';
import {
type KtxProjectConfig,
type KtxProjectEmbeddingConfig,
loadKtxProject,
markKtxSetupStateStepComplete,
readKtxSetupState,
serializeKtxProjectConfig,
} from '@ktx/context/project';
import { type KtxEmbeddingConfig, type KtxEmbeddingHealthCheckResult, runKtxEmbeddingHealthCheck } from '@ktx/llm';
import type { KtxCliIo } from './cli-runtime.js';
import { createStaticCliSpinner, type KtxCliSpinner } from './clack.js';
import {
ensureManagedLocalEmbeddingsDaemon,
managedLocalEmbeddingHealthConfig,
managedLocalEmbeddingProjectConfig,
type ManagedLocalEmbeddingsDaemon,
} from './managed-local-embeddings.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import { withTextInputNavigation } from './prompt-navigation.js';
import { envCredentialReference, writeProjectLocalSecretReference } from './setup-secrets.js';
import {
createKtxSetupPromptAdapter,
type KtxSetupPromptOption,
} from './setup-prompts.js';
export type KtxSetupEmbeddingBackend = 'openai' | 'sentence-transformers';
export interface KtxSetupEmbeddingsArgs {
projectDir: string;
inputMode: 'auto' | 'disabled';
cliVersion: string;
runtimeInstallPolicy: KtxManagedPythonInstallPolicy;
embeddingBackend?: KtxSetupEmbeddingBackend;
embeddingApiKeyEnv?: string;
embeddingApiKeyFile?: string;
forcePrompt?: boolean;
showPromptInstructions?: boolean;
skipEmbeddings: boolean;
}
export type KtxSetupEmbeddingsResult =
| { status: 'ready'; projectDir: string }
| { status: 'skipped'; projectDir: string }
| { status: 'back'; projectDir: string }
| { status: 'missing-input'; projectDir: string }
| { status: 'failed'; projectDir: string };
export interface KtxSetupEmbeddingsPromptAdapter {
select(options: { message: string; options: KtxSetupPromptOption[] }): Promise<string>;
password(options: { message: string }): Promise<string | undefined>;
cancel(message: string): void;
}
export interface KtxSetupEmbeddingsDeps {
env?: NodeJS.ProcessEnv;
prompts?: KtxSetupEmbeddingsPromptAdapter;
healthCheck?: (config: KtxEmbeddingConfig) => Promise<KtxEmbeddingHealthCheckResult>;
ensureLocalEmbeddings?: (options: {
cliVersion: string;
projectDir: string;
installPolicy: KtxManagedPythonInstallPolicy;
io: KtxCliIo;
}) => Promise<ManagedLocalEmbeddingsDaemon>;
spinner?: () => KtxCliSpinner;
}
type BackendChoice = KtxSetupEmbeddingBackend | 'back';
const DEFAULTS: Record<
KtxSetupEmbeddingBackend,
{ model: string; dimensions: number; envName?: string; baseUrl?: string; pathPrefix?: string }
> = {
openai: { model: 'text-embedding-3-small', dimensions: 1536, envName: 'OPENAI_API_KEY' },
'sentence-transformers': {
model: 'all-MiniLM-L6-v2',
dimensions: 384,
baseUrl: 'http://127.0.0.1:8765',
pathPrefix: '',
},
};
const LOCAL_EMBEDDING_BACKEND: KtxSetupEmbeddingBackend = 'sentence-transformers';
const EMBEDDING_OPTION_PROMPT_CONTEXT =
'KTX uses embeddings for semantic search over semantic-layer sources, wiki context, schema metadata, ' +
'and relationship evidence.';
const LOCAL_EMBEDDING_HEALTH_TIMEOUT_MS = 120_000;
const LOCAL_EMBEDDING_STDERR_TAIL_LINES = 40;
function createPromptAdapter(): KtxSetupEmbeddingsPromptAdapter {
return createKtxSetupPromptAdapter({ selectCancelValue: 'back' });
}
async function hasCompletedEmbeddings(projectDir: string, config: KtxProjectConfig): Promise<boolean> {
return (
(await readKtxSetupState(projectDir)).completed_steps.includes('embeddings') &&
config.ingest.embeddings.backend !== 'none' &&
typeof config.ingest.embeddings.model === 'string' &&
config.ingest.embeddings.model.length > 0 &&
config.ingest.embeddings.dimensions > 0
);
}
function buildProjectEmbeddingConfig(input: {
backend: KtxSetupEmbeddingBackend;
model: string;
dimensions: number;
credentialRef?: string;
}): KtxProjectEmbeddingConfig {
if (input.backend === 'openai') {
return {
backend: 'openai',
model: input.model,
dimensions: input.dimensions,
openai: {
...(input.credentialRef ? { api_key: input.credentialRef } : {}),
},
};
}
const defaults = DEFAULTS[input.backend];
return {
backend: input.backend,
model: input.model,
dimensions: input.dimensions,
sentenceTransformers: {
base_url: defaults.baseUrl ?? '',
pathPrefix: defaults.pathPrefix ?? '',
},
};
}
function buildHealthConfig(input: {
backend: KtxSetupEmbeddingBackend;
model: string;
dimensions: number;
credentialValue?: string;
}): KtxEmbeddingConfig {
if (input.backend === 'openai') {
return {
backend: 'openai',
model: input.model,
dimensions: input.dimensions,
openai: {
...(input.credentialValue ? { apiKey: input.credentialValue } : {}),
},
};
}
const defaults = DEFAULTS[input.backend];
return {
backend: input.backend,
model: input.model,
dimensions: input.dimensions,
sentenceTransformers: {
baseURL: defaults.baseUrl ?? '',
pathPrefix: defaults.pathPrefix ?? '',
},
};
}
function embeddingBackendDisplayName(backend: KtxSetupEmbeddingBackend): string {
if (backend === 'openai') {
return 'OpenAI';
}
return 'sentence-transformers';
}
async function persistEmbeddingConfig(projectDir: string, embeddings: KtxProjectEmbeddingConfig): Promise<void> {
const project = await loadKtxProject({ projectDir });
const config = {
...project.config,
ingest: {
...project.config.ingest,
embeddings,
},
scan: {
...project.config.scan,
enrichment: {
...project.config.scan.enrichment,
embeddings,
},
},
};
await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8');
await markKtxSetupStateStepComplete(projectDir, 'embeddings');
}
async function chooseCredentialRef(
backend: Extract<KtxSetupEmbeddingBackend, 'openai'>,
args: KtxSetupEmbeddingsArgs,
io: KtxCliIo,
deps: KtxSetupEmbeddingsDeps,
): Promise<{ status: 'ready'; ref: string; value: string } | { status: 'back' | 'missing-input' }> {
const env = deps.env ?? process.env;
if (args.embeddingApiKeyEnv) {
const ref = envCredentialReference(args.embeddingApiKeyEnv);
const value = resolveKtxConfigReference(ref, env);
if (!value) {
io.stderr.write(`Missing embedding API key: ${args.embeddingApiKeyEnv} is not set.\n`);
return { status: 'missing-input' };
}
return { status: 'ready', ref, value };
}
if (args.embeddingApiKeyFile) {
const ref = `file:${args.embeddingApiKeyFile}`;
let value: string | undefined;
try {
value = resolveKtxConfigReference(ref, env);
} catch {
value = undefined;
}
if (!value) {
io.stderr.write(`Missing embedding API key file: ${args.embeddingApiKeyFile}\n`);
return { status: 'missing-input' };
}
return { status: 'ready', ref, value };
}
if (args.inputMode === 'disabled') {
io.stderr.write('Missing embedding API key: pass --embedding-api-key-env or --embedding-api-key-file.\n');
return { status: 'missing-input' };
}
const defaultEnv = DEFAULTS[backend].envName ?? 'EMBEDDING_API_KEY';
const prompts = deps.prompts ?? createPromptAdapter();
const choice = await prompts.select({
message: `How should KTX find your ${embeddingBackendDisplayName(backend)} embedding API key?`,
options: [
{ value: 'env', label: `Use ${defaultEnv} from the environment` },
{ value: 'paste', label: 'Paste a key and save it as a local secret file' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'back') {
return { status: 'back' };
}
if (choice === 'paste') {
io.stdout.write(
`${[
`KTX will save the key in .ktx/secrets/${backend}-api-key with local file permissions,`,
'then write a file: reference in ktx.yaml.',
].join(' ')}\n`,
);
const value = await prompts.password({ message: withTextInputNavigation(`${backend} embedding API key`) });
if (value === undefined) {
return { status: 'back' };
}
if (!value.trim()) {
return { status: 'missing-input' };
}
const ref = await writeProjectLocalSecretReference({
projectDir: args.projectDir,
fileName: `${backend}-api-key`,
value,
});
return { status: 'ready', ref, value: value.trim() };
}
const ref = envCredentialReference(defaultEnv);
const value = resolveKtxConfigReference(ref, env);
if (!value) {
io.stderr.write(`Missing embedding API key: ${defaultEnv} is not set.\n`);
return { status: 'missing-input' };
}
return { status: 'ready', ref, value };
}
async function chooseEmbeddingBackend(
args: KtxSetupEmbeddingsArgs,
deps: KtxSetupEmbeddingsDeps,
): Promise<BackendChoice> {
if (args.embeddingBackend) {
return args.embeddingBackend;
}
if (args.inputMode === 'disabled') {
return LOCAL_EMBEDDING_BACKEND;
}
const choice = await (deps.prompts ?? createPromptAdapter()).select({
message: `Which embedding option should KTX use?\n\n${EMBEDDING_OPTION_PROMPT_CONTEXT}`,
options: [
{ value: 'sentence-transformers', label: 'Local sentence-transformers embeddings' },
{ value: 'openai', label: 'OpenAI embeddings', hint: 'recommended' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'openai' || choice === 'sentence-transformers' || choice === 'back') {
return choice;
}
return 'back';
}
async function readLocalEmbeddingDaemonStderrTail(stderrLog: string | undefined): Promise<string[]> {
if (!stderrLog) {
return [];
}
try {
const lines = (await readFile(stderrLog, 'utf8'))
.split(/\r?\n/)
.map((line) => line.trimEnd())
.filter((line) => line.trim().length > 0);
return lines.slice(-LOCAL_EMBEDDING_STDERR_TAIL_LINES);
} catch {
return [];
}
}
function localEmbeddingSetupMessage(message: string, stderrTail: string[] = []): string {
const lines = [
`Local embedding health check failed: ${message}`,
'Local embeddings use the KTX-managed Python runtime.',
'Prepare the runtime with: ktx admin runtime start --feature local-embeddings',
'Use --yes with setup to install and start the runtime without prompting.',
'The first run may download Python packages and the all-MiniLM-L6-v2 model.',
];
if (stderrTail.length > 0) {
lines.push('Recent KTX daemon stderr:', ...stderrTail);
}
return lines.join('\n');
}
async function promptAfterLocalEmbeddingFailure(
deps: KtxSetupEmbeddingsDeps,
): Promise<'retry' | Extract<KtxSetupEmbeddingBackend, 'openai'> | 'back'> {
const choice = await (deps.prompts ?? createPromptAdapter()).select({
message: 'Local embeddings are not reachable. Start the local KTX daemon, then retry.',
options: [
{ value: 'retry', label: 'Retry' },
{ value: 'openai', label: 'Use OpenAI embeddings' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'openai' || choice === 'back') {
return choice;
}
return 'retry';
}
function healthCheckStartText(backend: KtxSetupEmbeddingBackend, model: string, dimensions: number): string {
if (backend === LOCAL_EMBEDDING_BACKEND) {
return `Testing local embeddings (${model})`;
}
return `Checking ${backend} embeddings (${model}, ${dimensions} dimensions).`;
}
function startHealthCheckProgress(
spinner: KtxCliSpinner,
message: string,
): { succeed(msg: string): void; fail(msg: string): void } {
spinner.start(message);
return {
succeed(msg: string) {
spinner.stop(msg);
},
fail(msg: string) {
spinner.error(msg);
},
};
}
export async function runKtxSetupEmbeddingsStep(
args: KtxSetupEmbeddingsArgs,
io: KtxCliIo,
deps: KtxSetupEmbeddingsDeps = {},
): Promise<KtxSetupEmbeddingsResult> {
if (args.skipEmbeddings) {
io.stdout.write('│ Embeddings setup skipped.\n');
return { status: 'skipped', projectDir: args.projectDir };
}
const project = await loadKtxProject({ projectDir: args.projectDir });
if (
args.forcePrompt !== true &&
(await hasCompletedEmbeddings(args.projectDir, project.config)) &&
!args.embeddingBackend &&
!args.embeddingApiKeyEnv &&
!args.embeddingApiKeyFile
) {
io.stdout.write(`│ Embeddings ready: yes (${project.config.ingest.embeddings.model})\n`);
return { status: 'ready', projectDir: args.projectDir };
}
const healthCheck =
deps.healthCheck ??
((config: KtxEmbeddingConfig) =>
runKtxEmbeddingHealthCheck(config, { timeoutMs: LOCAL_EMBEDDING_HEALTH_TIMEOUT_MS }));
let selectedBackend: KtxSetupEmbeddingBackend | undefined;
while (true) {
if (!selectedBackend) {
const backend = await chooseEmbeddingBackend(args, deps);
if (backend === 'back') {
return { status: 'back', projectDir: args.projectDir };
}
selectedBackend = backend;
}
const defaults = DEFAULTS[selectedBackend];
const model = defaults.model;
const dimensions = defaults.dimensions;
let credentialRef: string | undefined;
let credentialValue: string | undefined;
if (selectedBackend === 'openai') {
const credential = await chooseCredentialRef(selectedBackend, args, io, deps);
if (credential.status === 'back' && !args.embeddingBackend && args.inputMode !== 'disabled') {
selectedBackend = undefined;
continue;
}
if (credential.status !== 'ready') {
return { status: credential.status, projectDir: args.projectDir };
}
credentialRef = credential.ref;
credentialValue = credential.value;
}
let managedLocalEmbeddings: ManagedLocalEmbeddingsDaemon | undefined;
if (selectedBackend === LOCAL_EMBEDDING_BACKEND) {
const ensureLocalEmbeddings = deps.ensureLocalEmbeddings ?? ensureManagedLocalEmbeddingsDaemon;
try {
managedLocalEmbeddings = await ensureLocalEmbeddings({
cliVersion: args.cliVersion,
projectDir: args.projectDir,
installPolicy: args.runtimeInstallPolicy,
io,
});
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return { status: 'failed', projectDir: args.projectDir };
}
}
const healthConfig =
selectedBackend === LOCAL_EMBEDDING_BACKEND && managedLocalEmbeddings
? managedLocalEmbeddingHealthConfig({
baseUrl: managedLocalEmbeddings.baseUrl,
model,
dimensions,
})
: buildHealthConfig({
backend: selectedBackend,
model,
dimensions,
credentialValue,
});
const healthSpinner = (deps.spinner ?? (() => createStaticCliSpinner(io)))();
const progress = startHealthCheckProgress(healthSpinner, healthCheckStartText(selectedBackend, model, dimensions));
let health: KtxEmbeddingHealthCheckResult;
try {
health = await healthCheck(healthConfig);
} catch (error) {
progress.fail('Embedding test failed');
throw error;
}
if (health.ok) {
progress.succeed(`Embedding test passed (${model}, ${dimensions} dimensions)`);
await persistEmbeddingConfig(
args.projectDir,
selectedBackend === LOCAL_EMBEDDING_BACKEND
? managedLocalEmbeddingProjectConfig({ model, dimensions })
: buildProjectEmbeddingConfig({
backend: selectedBackend,
model,
dimensions,
credentialRef,
}),
);
io.stdout.write(`│ Embeddings ready: yes (${model}, ${dimensions} dimensions)\n`);
return { status: 'ready', projectDir: args.projectDir };
}
progress.fail('Embedding test failed');
const stderrTail =
selectedBackend === 'sentence-transformers'
? await readLocalEmbeddingDaemonStderrTail(managedLocalEmbeddings?.stderrLog)
: [];
io.stderr.write(
selectedBackend === 'sentence-transformers'
? `${localEmbeddingSetupMessage(health.message, stderrTail)}\n`
: `Embedding health check failed: ${health.message}\n`,
);
if (args.inputMode === 'disabled') {
return { status: 'failed', projectDir: args.projectDir };
}
if (selectedBackend !== 'sentence-transformers' && (args.embeddingApiKeyEnv || args.embeddingApiKeyFile)) {
return { status: 'failed', projectDir: args.projectDir };
}
const nextAction =
selectedBackend === 'sentence-transformers' ? await promptAfterLocalEmbeddingFailure(deps) : 'retry';
if (nextAction === 'back') {
return { status: 'back', projectDir: args.projectDir };
}
if (nextAction === 'openai') {
selectedBackend = nextAction;
}
}
}