mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
refactor(cli): delete sentinel-based managed-embeddings indirection
This commit is contained in:
parent
e08c2523c9
commit
b79f79be84
10 changed files with 19 additions and 285 deletions
|
|
@ -1,29 +1,6 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import { buildDefaultKtxProjectConfig, type KtxLocalProject, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import {
|
||||
loadKtxCliProject,
|
||||
projectNeedsManagedLocalEmbeddings,
|
||||
substituteManagedLocalEmbeddingsUrl,
|
||||
} from './cli-project.js';
|
||||
import type { ManagedLocalEmbeddingsDaemon } from './managed-local-embeddings.js';
|
||||
|
||||
const RESOLVED_BASE_URL = 'http://127.0.0.1:51234';
|
||||
|
||||
function makeIo() {
|
||||
let stderr = '';
|
||||
return {
|
||||
io: {
|
||||
stdout: { write: (_chunk: string) => {} },
|
||||
stderr: {
|
||||
write: (chunk: string) => {
|
||||
stderr += chunk;
|
||||
},
|
||||
},
|
||||
},
|
||||
stderr: () => stderr,
|
||||
};
|
||||
}
|
||||
import { loadKtxCliProject } from './cli-project.js';
|
||||
|
||||
function projectWithConfig(config: KtxProjectConfig): KtxLocalProject {
|
||||
return {
|
||||
|
|
@ -36,147 +13,14 @@ function projectWithConfig(config: KtxProjectConfig): KtxLocalProject {
|
|||
};
|
||||
}
|
||||
|
||||
function withManagedIngestEmbedding(config: KtxProjectConfig): KtxProjectConfig {
|
||||
return {
|
||||
...config,
|
||||
ingest: {
|
||||
...config.ingest,
|
||||
embeddings: {
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL, pathPrefix: '' },
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function withManagedScanEnrichmentEmbedding(config: KtxProjectConfig): KtxProjectConfig {
|
||||
return {
|
||||
...config,
|
||||
scan: {
|
||||
...config.scan,
|
||||
enrichment: {
|
||||
...config.scan.enrichment,
|
||||
embeddings: {
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL, pathPrefix: '' },
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const fakeDaemon: ManagedLocalEmbeddingsDaemon = {
|
||||
baseUrl: RESOLVED_BASE_URL,
|
||||
stdoutLog: '/work/proj/.ktx/runtime/daemon.stdout.log',
|
||||
stderrLog: '/work/proj/.ktx/runtime/daemon.stderr.log',
|
||||
};
|
||||
|
||||
describe('projectNeedsManagedLocalEmbeddings', () => {
|
||||
it('returns false when neither ingest nor scan embeddings reference the managed sentinel', () => {
|
||||
expect(projectNeedsManagedLocalEmbeddings(buildDefaultKtxProjectConfig())).toBe(false);
|
||||
});
|
||||
|
||||
it('returns true when ingest.embeddings uses the managed sentinel', () => {
|
||||
expect(projectNeedsManagedLocalEmbeddings(withManagedIngestEmbedding(buildDefaultKtxProjectConfig()))).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true when scan.enrichment.embeddings uses the managed sentinel', () => {
|
||||
expect(
|
||||
projectNeedsManagedLocalEmbeddings(withManagedScanEnrichmentEmbedding(buildDefaultKtxProjectConfig())),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('substituteManagedLocalEmbeddingsUrl', () => {
|
||||
it('rewrites the managed sentinel in both ingest.embeddings and scan.enrichment.embeddings', () => {
|
||||
const config = withManagedScanEnrichmentEmbedding(withManagedIngestEmbedding(buildDefaultKtxProjectConfig()));
|
||||
const resolved = substituteManagedLocalEmbeddingsUrl(config, RESOLVED_BASE_URL);
|
||||
expect(resolved.ingest.embeddings.sentenceTransformers?.base_url).toBe(RESOLVED_BASE_URL);
|
||||
expect(resolved.scan.enrichment.embeddings?.sentenceTransformers?.base_url).toBe(RESOLVED_BASE_URL);
|
||||
});
|
||||
|
||||
it('returns the input unchanged when no sentinel is present', () => {
|
||||
const config = buildDefaultKtxProjectConfig();
|
||||
const resolved = substituteManagedLocalEmbeddingsUrl(config, RESOLVED_BASE_URL);
|
||||
expect(resolved.ingest.embeddings).toEqual(config.ingest.embeddings);
|
||||
expect(resolved.scan.enrichment.embeddings).toEqual(config.scan.enrichment.embeddings);
|
||||
});
|
||||
|
||||
it('does not touch non-sentinel sentence-transformers URLs', () => {
|
||||
const config: KtxProjectConfig = {
|
||||
...buildDefaultKtxProjectConfig(),
|
||||
ingest: {
|
||||
...buildDefaultKtxProjectConfig().ingest,
|
||||
embeddings: {
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: 'http://localhost:9999', pathPrefix: '' },
|
||||
},
|
||||
},
|
||||
};
|
||||
const resolved = substituteManagedLocalEmbeddingsUrl(config, RESOLVED_BASE_URL);
|
||||
expect(resolved.ingest.embeddings.sentenceTransformers?.base_url).toBe('http://localhost:9999');
|
||||
});
|
||||
});
|
||||
|
||||
describe('loadKtxCliProject', () => {
|
||||
it('returns the project unchanged and does not start the daemon when no sentinel is present', async () => {
|
||||
const io = makeIo();
|
||||
it('delegates to loadKtxProject and returns the project unchanged', async () => {
|
||||
const project = projectWithConfig(buildDefaultKtxProjectConfig());
|
||||
const loadProject = vi.fn(async () => project);
|
||||
const ensureLocalEmbeddings = vi.fn(async () => fakeDaemon);
|
||||
|
||||
const result = await loadKtxCliProject(
|
||||
{ projectDir: '/work/proj', cliVersion: '0.2.0', installPolicy: 'never', io: io.io },
|
||||
{ loadProject, ensureLocalEmbeddings },
|
||||
);
|
||||
const result = await loadKtxCliProject({ projectDir: '/work/proj' }, { loadProject });
|
||||
|
||||
expect(result).toBe(project);
|
||||
expect(ensureLocalEmbeddings).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('starts the daemon and substitutes the resolved URL when ingest.embeddings uses the sentinel', async () => {
|
||||
const io = makeIo();
|
||||
const project = projectWithConfig(withManagedIngestEmbedding(buildDefaultKtxProjectConfig()));
|
||||
const loadProject = vi.fn(async () => project);
|
||||
const ensureLocalEmbeddings = vi.fn(async () => fakeDaemon);
|
||||
|
||||
const result = await loadKtxCliProject(
|
||||
{ projectDir: '/work/proj', cliVersion: '0.2.0', installPolicy: 'never', io: io.io },
|
||||
{ loadProject, ensureLocalEmbeddings },
|
||||
);
|
||||
|
||||
expect(ensureLocalEmbeddings).toHaveBeenCalledWith({
|
||||
cliVersion: '0.2.0',
|
||||
projectDir: '/work/proj',
|
||||
installPolicy: 'never',
|
||||
io: io.io,
|
||||
});
|
||||
expect(result.config.ingest.embeddings.sentenceTransformers?.base_url).toBe(RESOLVED_BASE_URL);
|
||||
});
|
||||
|
||||
it('does not mutate process.env', async () => {
|
||||
const io = makeIo();
|
||||
const before = process.env.KTX_MANAGED_SENTENCE_TRANSFORMERS_BASE_URL;
|
||||
delete process.env.KTX_MANAGED_SENTENCE_TRANSFORMERS_BASE_URL;
|
||||
try {
|
||||
const project = projectWithConfig(withManagedIngestEmbedding(buildDefaultKtxProjectConfig()));
|
||||
await loadKtxCliProject(
|
||||
{ projectDir: '/work/proj', cliVersion: '0.2.0', installPolicy: 'never', io: io.io },
|
||||
{ loadProject: vi.fn(async () => project), ensureLocalEmbeddings: vi.fn(async () => fakeDaemon) },
|
||||
);
|
||||
expect(process.env.KTX_MANAGED_SENTENCE_TRANSFORMERS_BASE_URL).toBeUndefined();
|
||||
} finally {
|
||||
if (before === undefined) {
|
||||
delete process.env.KTX_MANAGED_SENTENCE_TRANSFORMERS_BASE_URL;
|
||||
} else {
|
||||
process.env.KTX_MANAGED_SENTENCE_TRANSFORMERS_BASE_URL = before;
|
||||
}
|
||||
}
|
||||
expect(loadProject).toHaveBeenCalledWith({ projectDir: '/work/proj' });
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,91 +1,20 @@
|
|||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
|
||||
import type { KtxProjectConfig, KtxProjectEmbeddingConfig } from '@ktx/context/project';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import {
|
||||
ensureManagedLocalEmbeddingsDaemon,
|
||||
type ManagedLocalEmbeddingsDaemon,
|
||||
} from './managed-local-embeddings.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
|
||||
export interface LoadKtxCliProjectOptions {
|
||||
projectDir: string;
|
||||
cliVersion: string;
|
||||
installPolicy: KtxManagedPythonInstallPolicy;
|
||||
io: KtxCliIo;
|
||||
}
|
||||
|
||||
export interface LoadKtxCliProjectDeps {
|
||||
loadProject?: typeof loadKtxProject;
|
||||
ensureLocalEmbeddings?: (
|
||||
options: Parameters<typeof ensureManagedLocalEmbeddingsDaemon>[0],
|
||||
) => Promise<ManagedLocalEmbeddingsDaemon>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Thin wrapper around `loadKtxProject`. Kept as a single entrypoint so the CLI can grow shared
|
||||
* pre-load behavior later (telemetry, project lock, etc.). Today it does no extra work.
|
||||
*/
|
||||
export async function loadKtxCliProject(
|
||||
options: LoadKtxCliProjectOptions,
|
||||
deps: LoadKtxCliProjectDeps = {},
|
||||
): Promise<KtxLocalProject> {
|
||||
const loadProject = deps.loadProject ?? loadKtxProject;
|
||||
const ensureLocalEmbeddings = deps.ensureLocalEmbeddings ?? ensureManagedLocalEmbeddingsDaemon;
|
||||
|
||||
const project = await loadProject({ projectDir: options.projectDir });
|
||||
if (!projectNeedsManagedLocalEmbeddings(project.config)) {
|
||||
return project;
|
||||
}
|
||||
|
||||
const daemon = await ensureLocalEmbeddings({
|
||||
cliVersion: options.cliVersion,
|
||||
projectDir: options.projectDir,
|
||||
installPolicy: options.installPolicy,
|
||||
io: options.io,
|
||||
});
|
||||
|
||||
return {
|
||||
...project,
|
||||
config: substituteManagedLocalEmbeddingsUrl(project.config, daemon.baseUrl),
|
||||
};
|
||||
}
|
||||
|
||||
export function projectNeedsManagedLocalEmbeddings(config: KtxProjectConfig): boolean {
|
||||
return (
|
||||
embeddingUsesManagedSentinel(config.ingest.embeddings) ||
|
||||
embeddingUsesManagedSentinel(config.scan.enrichment.embeddings)
|
||||
);
|
||||
}
|
||||
|
||||
export function substituteManagedLocalEmbeddingsUrl(
|
||||
config: KtxProjectConfig,
|
||||
baseUrl: string,
|
||||
): KtxProjectConfig {
|
||||
const ingestEmbeddings = rewriteManagedEmbeddingConfig(config.ingest.embeddings, baseUrl);
|
||||
const scanEnrichmentEmbeddings = rewriteManagedEmbeddingConfig(config.scan.enrichment.embeddings, baseUrl);
|
||||
return {
|
||||
...config,
|
||||
ingest: { ...config.ingest, embeddings: ingestEmbeddings },
|
||||
scan: {
|
||||
...config.scan,
|
||||
enrichment: { ...config.scan.enrichment, embeddings: scanEnrichmentEmbeddings },
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function embeddingUsesManagedSentinel(embedding: KtxProjectEmbeddingConfig | undefined): boolean {
|
||||
return embedding?.sentenceTransformers?.base_url === MANAGED_SENTENCE_TRANSFORMERS_BASE_URL;
|
||||
}
|
||||
|
||||
function rewriteManagedEmbeddingConfig<T extends KtxProjectEmbeddingConfig | undefined>(
|
||||
embedding: T,
|
||||
baseUrl: string,
|
||||
): T {
|
||||
if (!embedding || !embeddingUsesManagedSentinel(embedding)) {
|
||||
return embedding;
|
||||
}
|
||||
return {
|
||||
...embedding,
|
||||
sentenceTransformers: {
|
||||
...embedding.sentenceTransformers,
|
||||
base_url: baseUrl,
|
||||
},
|
||||
} as T;
|
||||
return (deps.loadProject ?? loadKtxProject)({ projectDir: options.projectDir });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,6 @@ export type {
|
|||
export {
|
||||
ensureManagedLocalEmbeddingsDaemon,
|
||||
managedLocalEmbeddingHealthConfig,
|
||||
managedLocalEmbeddingProjectConfig,
|
||||
type ManagedLocalEmbeddingsDaemon,
|
||||
type ManagedLocalEmbeddingsOptions,
|
||||
} from './managed-local-embeddings.js';
|
||||
|
|
|
|||
|
|
@ -1,9 +1,7 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import {
|
||||
ensureManagedLocalEmbeddingsDaemon,
|
||||
managedLocalEmbeddingHealthConfig,
|
||||
managedLocalEmbeddingProjectConfig,
|
||||
tryUseManagedLocalEmbeddingsDaemon,
|
||||
} from './managed-local-embeddings.js';
|
||||
import type { ManagedPythonCommandRuntime } from './managed-python-command.js';
|
||||
|
|
@ -96,25 +94,6 @@ function daemonResult(status: 'started' | 'reused' = 'reused'): ManagedPythonDae
|
|||
};
|
||||
}
|
||||
|
||||
describe('managedLocalEmbeddingProjectConfig', () => {
|
||||
it('uses a stable managed runtime marker instead of a random daemon port', () => {
|
||||
expect(
|
||||
managedLocalEmbeddingProjectConfig({
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
}),
|
||||
).toEqual({
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: {
|
||||
base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL,
|
||||
pathPrefix: '',
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('managedLocalEmbeddingHealthConfig', () => {
|
||||
it('uses the active KTX daemon URL for the immediate health check', () => {
|
||||
expect(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import type { KtxProjectEmbeddingConfig } from '@ktx/context/project';
|
||||
import type { KtxEmbeddingConfig } from '@ktx/llm';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import {
|
||||
|
|
@ -39,21 +37,6 @@ export interface ManagedLocalEmbeddingsOptions {
|
|||
}) => Promise<ManagedPythonDaemonStartResult>;
|
||||
}
|
||||
|
||||
export function managedLocalEmbeddingProjectConfig(input: {
|
||||
model: string;
|
||||
dimensions: number;
|
||||
}): KtxProjectEmbeddingConfig {
|
||||
return {
|
||||
backend: 'sentence-transformers',
|
||||
model: input.model,
|
||||
dimensions: input.dimensions,
|
||||
sentenceTransformers: {
|
||||
base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL,
|
||||
pathPrefix: '',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function managedLocalEmbeddingHealthConfig(input: {
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
|
|
@ -51,7 +50,7 @@ describe('runtime requirement detection', () => {
|
|||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: {
|
||||
base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL,
|
||||
base_url: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import type {
|
||||
KtxProjectConfig,
|
||||
KtxProjectConnectionConfig,
|
||||
|
|
@ -63,7 +62,7 @@ function requiresManagedLocalEmbeddings(embeddings: KtxProjectEmbeddingConfig):
|
|||
return false;
|
||||
}
|
||||
const baseUrl = embeddings.sentenceTransformers?.base_url;
|
||||
return baseUrl === undefined || baseUrl === '' || baseUrl === MANAGED_SENTENCE_TRANSFORMERS_BASE_URL;
|
||||
return baseUrl === undefined || baseUrl === '';
|
||||
}
|
||||
|
||||
function uniqueRequirements(requirements: KtxRuntimeRequirement[]): KtxRuntimeRequirements {
|
||||
|
|
|
|||
|
|
@ -176,8 +176,8 @@ describe('setup embeddings step', () => {
|
|||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: 'managed:local-embeddings', pathPrefix: '' },
|
||||
});
|
||||
expect(config.ingest.embeddings.sentenceTransformers).toBeUndefined();
|
||||
expect(config.scan.enrichment.embeddings).toMatchObject(config.ingest.embeddings);
|
||||
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
|
||||
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('embeddings');
|
||||
|
|
@ -275,8 +275,8 @@ describe('setup embeddings step', () => {
|
|||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: 'managed:local-embeddings', pathPrefix: '' },
|
||||
});
|
||||
expect(config.ingest.embeddings.sentenceTransformers).toBeUndefined();
|
||||
expect(config.scan.enrichment.embeddings).toMatchObject(config.ingest.embeddings);
|
||||
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
|
||||
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('embeddings');
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ import { createStaticCliSpinner, type KtxCliSpinner } from './clack.js';
|
|||
import {
|
||||
ensureManagedLocalEmbeddingsDaemon,
|
||||
managedLocalEmbeddingHealthConfig,
|
||||
managedLocalEmbeddingProjectConfig,
|
||||
type ManagedLocalEmbeddingsDaemon,
|
||||
} from './managed-local-embeddings.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
|
|
@ -455,7 +454,11 @@ export async function runKtxSetupEmbeddingsStep(
|
|||
await persistEmbeddingConfig(
|
||||
args.projectDir,
|
||||
selectedBackend === LOCAL_EMBEDDING_BACKEND
|
||||
? managedLocalEmbeddingProjectConfig({ model, dimensions })
|
||||
? {
|
||||
backend: 'sentence-transformers' as const,
|
||||
model,
|
||||
dimensions,
|
||||
}
|
||||
: buildProjectEmbeddingConfig({
|
||||
backend: selectedBackend,
|
||||
model,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import { buildDefaultKtxProjectConfig, readKtxSetupState, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { runKtxSetupRuntimeStep } from './setup-runtime.js';
|
||||
|
|
@ -113,7 +112,7 @@ describe('runKtxSetupRuntimeStep', () => {
|
|||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL },
|
||||
sentenceTransformers: { base_url: '' },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue