ktx/packages/cli/test/managed-local-embeddings.test.ts
Andrey Avtomonov 56985b7e09
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00

254 lines
7.4 KiB
TypeScript

import { describe, expect, it, vi } from 'vitest';
import {
ensureManagedLocalEmbeddingsDaemon,
managedLocalEmbeddingHealthConfig,
tryUseManagedLocalEmbeddingsDaemon,
} from '../src/managed-local-embeddings.js';
import type { ManagedPythonCommandRuntime } from '../src/managed-python-command.js';
import type { ManagedPythonDaemonStartResult } from '../src/managed-python-daemon.js';
import type { ManagedPythonDaemonLayout } from '../src/managed-python-runtime.js';
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function runtime(): ManagedPythonCommandRuntime {
return {
layout: {
cliVersion: '0.2.0',
runtimeRoot: '/runtime',
versionDir: '/runtime/0.2.0',
venvDir: '/runtime/0.2.0/.venv',
manifestPath: '/runtime/0.2.0/manifest.json',
installLogPath: '/runtime/0.2.0/install.log',
assetDir: '/assets/python',
assetManifestPath: '/assets/python/manifest.json',
pythonPath: '/runtime/0.2.0/.venv/bin/python',
daemonPath: '/runtime/0.2.0/.venv/bin/ktx-daemon',
},
manifest: {
schemaVersion: 1,
cliVersion: '0.2.0',
installedAt: '2026-05-11T00:00:00.000Z',
asset: {
schemaVersion: 1,
distributionName: 'kaelio-ktx',
normalizedName: 'kaelio_ktx',
version: '0.2.0',
wheel: {
file: 'kaelio_ktx-0.2.0-py3-none-any.whl',
sha256: 'a'.repeat(64),
bytes: 123,
},
},
features: ['core', 'local-embeddings'],
python: {
executable: '/runtime/0.2.0/.venv/bin/python',
daemonExecutable: '/runtime/0.2.0/.venv/bin/ktx-daemon',
},
installLog: '/runtime/0.2.0/install.log',
},
};
}
function daemonResult(status: 'started' | 'reused' = 'reused'): ManagedPythonDaemonStartResult {
return {
status,
layout: {
...runtime().layout,
projectDir: '/work/proj',
daemonStateDir: '/work/proj/.ktx/runtime',
daemonStatePath: '/work/proj/.ktx/runtime/daemon.json',
daemonStdoutPath: '/work/proj/.ktx/runtime/daemon.stdout.log',
daemonStderrPath: '/work/proj/.ktx/runtime/daemon.stderr.log',
},
baseUrl: 'http://127.0.0.1:61234',
state: {
schemaVersion: 1,
pid: 12345,
host: '127.0.0.1',
port: 61234,
version: '0.2.0',
features: ['core', 'local-embeddings'],
startedAt: '2026-05-11T00:00:00.000Z',
stdoutLog: '/work/proj/.ktx/runtime/daemon.stdout.log',
stderrLog: '/work/proj/.ktx/runtime/daemon.stderr.log',
},
};
}
describe('managedLocalEmbeddingHealthConfig', () => {
it('uses the active KTX daemon URL for the immediate health check', () => {
expect(
managedLocalEmbeddingHealthConfig({
baseUrl: 'http://127.0.0.1:61234',
model: 'all-MiniLM-L6-v2',
dimensions: 384,
}),
).toEqual({
backend: 'sentence-transformers',
model: 'all-MiniLM-L6-v2',
dimensions: 384,
sentenceTransformers: { baseURL: 'http://127.0.0.1:61234', pathPrefix: '' },
});
});
});
describe('ensureManagedLocalEmbeddingsDaemon', () => {
it('ensures the local-embeddings feature and starts the KTX daemon', async () => {
const io = makeIo();
const ensureRuntime = vi.fn(async () => runtime());
const startDaemon = vi.fn(async () => daemonResult('started'));
await expect(
ensureManagedLocalEmbeddingsDaemon({
cliVersion: '0.2.0',
projectDir: '/work/proj',
installPolicy: 'auto',
io: io.io,
ensureRuntime,
startDaemon,
}),
).resolves.toEqual({
baseUrl: 'http://127.0.0.1:61234',
stdoutLog: '/work/proj/.ktx/runtime/daemon.stdout.log',
stderrLog: '/work/proj/.ktx/runtime/daemon.stderr.log',
});
expect(ensureRuntime).toHaveBeenCalledWith({
cliVersion: '0.2.0',
installPolicy: 'auto',
io: io.io,
feature: 'local-embeddings',
});
expect(startDaemon).toHaveBeenCalledWith({
cliVersion: '0.2.0',
projectDir: '/work/proj',
features: ['local-embeddings'],
force: false,
});
expect(io.stderr()).toContain('Started KTX daemon: http://127.0.0.1:61234');
});
it('reuses an already running daemon without reporting a new start', async () => {
const io = makeIo();
await ensureManagedLocalEmbeddingsDaemon({
cliVersion: '0.2.0',
projectDir: '/work/proj',
installPolicy: 'prompt',
io: io.io,
ensureRuntime: vi.fn(async () => runtime()),
startDaemon: vi.fn(async () => daemonResult('reused')),
});
expect(io.stderr()).toContain('Using KTX daemon: http://127.0.0.1:61234');
});
});
describe('tryUseManagedLocalEmbeddingsDaemon', () => {
it('returns the daemon when one is running and healthy', async () => {
const readStatus = vi.fn(async () => ({
kind: 'running' as const,
detail: 'ok',
layout: {} as ManagedPythonDaemonLayout,
state: {
schemaVersion: 1 as const,
pid: 123,
host: '127.0.0.1' as const,
port: 4321,
version: '0.5.0',
features: ['local-embeddings' as const],
startedAt: '2026-05-21T00:00:00Z',
stdoutLog: '/tmp/stdout.log',
stderrLog: '/tmp/stderr.log',
},
baseUrl: 'http://127.0.0.1:4321',
}));
const result = await tryUseManagedLocalEmbeddingsDaemon({
cliVersion: '0.5.0',
projectDir: '/work/proj',
readStatus,
});
expect(result).toEqual({
baseUrl: 'http://127.0.0.1:4321',
stdoutLog: '/tmp/stdout.log',
stderrLog: '/tmp/stderr.log',
});
expect(readStatus).toHaveBeenCalledWith({
cliVersion: '0.5.0',
projectDir: '/work/proj',
});
});
it('returns null when no daemon state exists', async () => {
const readStatus = vi.fn(async () => ({
kind: 'stopped' as const,
detail: 'no state',
layout: {} as ManagedPythonDaemonLayout,
}));
const result = await tryUseManagedLocalEmbeddingsDaemon({
cliVersion: '0.5.0',
projectDir: '/work/proj',
readStatus,
});
expect(result).toBeNull();
});
it('returns null when daemon is stale', async () => {
const readStatus = vi.fn(async () => ({
kind: 'stale' as const,
detail: 'process gone',
layout: {} as ManagedPythonDaemonLayout,
}));
const result = await tryUseManagedLocalEmbeddingsDaemon({
cliVersion: '0.5.0',
projectDir: '/work/proj',
readStatus,
});
expect(result).toBeNull();
});
it('rejects daemons that do not advertise local-embeddings', async () => {
const readStatus = vi.fn(async () => ({
kind: 'running' as const,
detail: 'ok',
layout: {} as ManagedPythonDaemonLayout,
state: {
schemaVersion: 1 as const,
pid: 123,
host: '127.0.0.1' as const,
port: 4321,
version: '0.5.0',
features: ['core' as const],
startedAt: '2026-05-21T00:00:00Z',
stdoutLog: '/tmp/stdout.log',
stderrLog: '/tmp/stderr.log',
},
baseUrl: 'http://127.0.0.1:4321',
}));
const result = await tryUseManagedLocalEmbeddingsDaemon({
cliVersion: '0.5.0',
projectDir: '/work/proj',
readStatus,
});
expect(result).toBeNull();
});
});