ktx/packages/cli/src/sl.test.ts
Andrey Avtomonov 9d92c79988
fix(cli): resolve embedding provider explicitly and surface lane status in sl search (#192)
* feat(cli): add tryUseManagedLocalEmbeddingsDaemon for read-only callers

* feat(cli): add resolveProjectEmbeddingProvider helper

* fix(cli): wire sl search through resolveProjectEmbeddingProvider so semantic lane works

* fix(cli): wire wiki/knowledge search through resolveProjectEmbeddingProvider

* feat(cli): surface embeddings-unavailable status when sl search returns empty

* refactor(cli): route admin reindex through resolveProjectEmbeddingProvider

* refactor: pass embeddingProvider into ingest/scan instead of resolving inside @ktx/context

* refactor(mcp): resolve embedding provider in CLI factory, pass into context ports

* refactor(context): delete MANAGED_SENTENCE_TRANSFORMERS_BASE_URL sentinel

* refactor(cli): delete sentinel-based managed-embeddings indirection

* chore: scrub stale managed-embeddings sentinel references from tests and smoke script

* chore: unexport unused EmbeddingResolutionMode alias

* fix(cli): force pathPrefix="" when targeting the managed embeddings daemon

The managed daemon serves /embeddings/compute directly. The default
pathPrefix in @ktx/llm is /api, so omitting sentenceTransformers from
ktx.yaml produced /api/embeddings/compute -> 404. The resolver now
sets pathPrefix='' explicitly when wiring the managed daemon URL,
matching what the daemon actually exposes.
2026-05-21 02:21:22 +02:00

715 lines
19 KiB
TypeScript

import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { stripVTControlCharacters } from 'node:util';
import Database from 'better-sqlite3';
import { initKtxProject } from '@ktx/context/project';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { runKtxSl } from './sl.js';
const ORDERS_YAML = [
'name: orders',
'table: public.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
'',
].join('\n');
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
async function seedSlSource(input: {
projectDir: string;
connectionId?: string;
sourceName?: string;
yaml?: string;
}): Promise<void> {
const project = await initKtxProject({ projectDir: input.projectDir });
await project.fileStore.writeFile(
`semantic-layer/${input.connectionId ?? 'warehouse'}/${input.sourceName ?? 'orders'}.yaml`,
input.yaml ?? ORDERS_YAML,
'ktx',
'ktx@example.com',
'Add semantic-layer source',
);
}
describe('runKtxSl', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-sl-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('validates, lists, and searches semantic-layer sources', async () => {
const projectDir = join(tempDir, 'project');
await seedSlSource({ projectDir });
const validateIo = makeIo();
await expect(
runKtxSl({ command: 'validate', projectDir, connectionId: 'warehouse', sourceName: 'orders' }, validateIo.io),
).resolves.toBe(0);
expect(validateIo.stdout()).toContain('Valid semantic-layer source: warehouse/orders');
const listIo = makeIo();
await expect(
runKtxSl({ command: 'list', projectDir, connectionId: 'warehouse', cliVersion: '0.0.0-test' }, listIo.io),
).resolves.toBe(0);
expect(listIo.stdout()).toContain('warehouse\torders\tcolumns=1\tmeasures=0\tjoins=0');
const searchIo = makeIo();
await expect(
runKtxSl(
{
command: 'search',
projectDir,
connectionId: 'warehouse',
query: 'order',
json: true,
cliVersion: '0.0.0-test',
},
searchIo.io,
),
).resolves.toBe(0);
expect(JSON.parse(searchIo.stdout())).toMatchObject({
kind: 'list',
data: {
items: [
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
score: expect.any(Number),
}),
],
},
meta: { command: 'sl search' },
});
});
it('prints semantic-layer search rank badges in pretty output', async () => {
const projectDir = join(tempDir, 'rank-project');
await seedSlSource({ projectDir });
const searchIo = makeIo();
await expect(
runKtxSl(
{
command: 'search',
projectDir,
connectionId: 'warehouse',
query: 'order',
output: 'pretty',
cliVersion: '0.0.0-test',
},
searchIo.io,
),
).resolves.toBe(0);
const stdout = stripVTControlCharacters(searchIo.stdout());
expect(stdout).toMatch(/#1\s+orders/);
expect(stdout).not.toContain('%');
});
it('prints semantic-layer list and search as public JSON envelopes', async () => {
const projectDir = join(tempDir, 'project');
await seedSlSource({
projectDir,
yaml: [
'name: orders',
'table: public.orders',
'descriptions:',
' user: Paid order facts',
'grain: [order_id]',
'columns:',
' - name: order_id',
' type: string',
'',
].join('\n'),
});
const listIo = makeIo();
await expect(
runKtxSl(
{
command: 'search',
projectDir,
connectionId: 'warehouse',
query: 'paid',
json: true,
cliVersion: '0.0.0-test',
},
listIo.io,
),
).resolves.toBe(0);
expect(JSON.parse(listIo.stdout())).toMatchObject({
kind: 'list',
data: {
items: [
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
score: expect.any(Number),
matchReasons: expect.any(Array),
}),
],
},
meta: { command: 'sl search' },
});
});
it('fails validation when a table-backed source declares columns absent from a matching warehouse manifest', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });
await project.fileStore.writeFile(
'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml',
`tables:
int_active_contract_arr:
table: orbit_analytics.int_active_contract_arr
columns:
- { name: contract_id, type: string }
- { name: contract_arr_cents, type: number }
`,
'ktx',
'ktx@example.com',
'Add warehouse manifest',
);
await project.fileStore.writeFile(
'semantic-layer/dbt-main/int_active_contract_arr.yaml',
`name: int_active_contract_arr
table: orbit_analytics.int_active_contract_arr
grain: [contract_id]
columns:
- { name: contract_id, type: string }
- { name: arr_cents, type: number }
measures:
- { name: arr, expr: sum(arr_cents) }
joins: []
`,
'ktx',
'ktx@example.com',
'Add invalid dbt source',
);
const validateIo = makeIo();
await expect(
runKtxSl(
{
command: 'validate',
projectDir,
connectionId: 'dbt-main',
sourceName: 'int_active_contract_arr',
},
validateIo.io,
),
).resolves.toBe(1);
expect(validateIo.stderr()).toContain('arr_cents');
expect(validateIo.stderr()).toContain('absent from physical table');
});
it('runs sl query and prints SQL output', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });
project.config.connections.warehouse = { driver: 'postgres' };
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: public.orders
grain: [id]
columns:
- name: id
type: number
measures:
- name: order_count
expr: count(*)
joins: []
`,
'ktx',
'ktx@example.com',
'Add orders source',
);
const stdout = { write: vi.fn() };
const stderr = { write: vi.fn() };
const loadProject = vi.fn(async () => project);
const createSemanticLayerCompute = vi.fn(() => ({
query: vi.fn(async () => ({
sql: 'select count(*) as order_count from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: {},
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
}));
await expect(
runKtxSl(
{
command: 'query',
projectDir: '/tmp/project',
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
format: 'sql',
execute: false,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
{ stdout, stderr },
{ loadProject, createSemanticLayerCompute },
),
).resolves.toBe(0);
expect(stdout.write).toHaveBeenCalledWith('select count(*) as order_count from public.orders\n');
expect(stderr.write).not.toHaveBeenCalled();
});
it('runs sl query from a JSON query file', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });
project.config.connections.warehouse = { driver: 'postgres' };
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: public.orders
grain: [id]
columns:
- name: id
type: number
measures:
- name: order_count
expr: count(*)
joins: []
`,
'ktx',
'ktx@example.com',
'Add orders source',
);
const queryFile = join(tempDir, 'query.json');
await writeFile(queryFile, '{"measures":["orders.order_count"],"dimensions":[]}', 'utf-8');
const stdout = { write: vi.fn() };
const stderr = { write: vi.fn() };
const query = vi.fn(async () => ({
sql: 'select count(*) as order_count from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: {},
}));
const createSemanticLayerCompute = vi.fn(() => ({
query,
validateSources: vi.fn(),
generateSources: vi.fn(),
}));
await expect(
runKtxSl(
{
command: 'query',
projectDir,
connectionId: 'warehouse',
queryFile,
format: 'json',
execute: false,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
{ stdout, stderr },
{ createSemanticLayerCompute },
),
).resolves.toBe(0);
expect(query).toHaveBeenCalledWith(
expect.objectContaining({
query: { measures: ['orders.order_count'], dimensions: [] },
}),
);
expect(JSON.parse(String(stdout.write.mock.calls[0][0]))).toMatchObject({
sql: 'select count(*) as order_count from public.orders',
plan: { execution: { mode: 'compile_only' } },
});
expect(stderr.write).not.toHaveBeenCalled();
});
it('creates default sl query compute through the managed runtime helper', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });
project.config.connections.warehouse = { driver: 'postgres' };
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: public.orders
grain: [id]
columns:
- name: id
type: number
measures:
- name: order_count
expr: count(*)
joins: []
`,
'ktx',
'ktx@example.com',
'Add orders source',
);
const stdout = { write: vi.fn() };
const stderr = { write: vi.fn() };
const compute = {
query: vi.fn(async () => ({
sql: 'select count(*) as order_count from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: {},
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
};
const createManagedSemanticLayerCompute = vi.fn(async () => compute);
await expect(
runKtxSl(
{
command: 'query',
projectDir,
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
format: 'sql',
execute: false,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
{ stdout, stderr },
{ createManagedSemanticLayerCompute },
),
).resolves.toBe(0);
expect(createManagedSemanticLayerCompute).toHaveBeenCalledWith({
cliVersion: '0.2.0',
installPolicy: 'auto',
io: { stdout, stderr },
});
expect(stdout.write).toHaveBeenCalledWith('select count(*) as order_count from public.orders\n');
});
it('executes sl query through the injected query executor', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });
project.config.connections.warehouse = { driver: 'postgres', url: 'postgres://example/db' };
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: public.orders
grain: [id]
columns:
- name: id
type: number
measures:
- name: order_count
expr: count(*)
joins: []
`,
'ktx',
'ktx@example.com',
'Add orders source',
);
const stdout = { write: vi.fn() };
const stderr = { write: vi.fn() };
const loadProject = vi.fn(async () => project);
const queryExecutor = {
execute: vi.fn(async () => ({
headers: ['orders.order_count'],
rows: [[4]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const createSemanticLayerCompute = vi.fn(() => ({
query: vi.fn(async () => ({
sql: 'select count(*) as order_count from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: {},
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
}));
await expect(
runKtxSl(
{
command: 'query',
projectDir,
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
format: 'json',
execute: true,
maxRows: 20,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
{ stdout, stderr },
{
loadProject,
createSemanticLayerCompute,
createQueryExecutor: () => queryExecutor,
},
),
).resolves.toBe(0);
expect(queryExecutor.execute).toHaveBeenCalledWith(
expect.objectContaining({
connectionId: 'warehouse',
maxRows: 20,
}),
);
expect(JSON.parse(String(stdout.write.mock.calls[0][0]))).toMatchObject({
rows: [[4]],
totalRows: 1,
plan: {
execution: {
mode: 'executed',
},
},
});
expect(stderr.write).not.toHaveBeenCalled();
});
it('executes sl query against a local SQLite connection through the default executor', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });
const dbPath = join(projectDir, 'warehouse.db');
const db = new Database(dbPath);
db.exec(`
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
status TEXT NOT NULL
);
INSERT INTO orders (status) VALUES ('paid'), ('paid'), ('open');
`);
db.close();
project.config.connections.warehouse = { driver: 'sqlite', path: 'warehouse.db' };
await writeFile(
join(projectDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: sqlite',
' path: warehouse.db',
'',
].join('\n'),
'utf-8',
);
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: orders
grain: [id]
columns:
- name: id
type: number
- name: status
type: string
measures:
- name: order_count
expr: count(*)
joins: []
`,
'ktx',
'ktx@example.com',
'Add orders source',
);
const stdout = { write: vi.fn() };
const stderr = { write: vi.fn() };
const createSemanticLayerCompute = vi.fn(() => ({
query: vi.fn(async () => ({
sql: 'select count(*) as order_count from orders',
dialect: 'sqlite',
columns: [{ name: 'orders.order_count' }],
plan: {},
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
}));
const exitCode = await runKtxSl(
{
command: 'query',
projectDir,
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
format: 'json',
execute: true,
maxRows: 20,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
{ stdout, stderr },
{ createSemanticLayerCompute },
);
expect(stderr.write).not.toHaveBeenCalled();
expect(exitCode).toBe(0);
expect(JSON.parse(String(stdout.write.mock.calls[0][0]))).toMatchObject({
connectionId: 'warehouse',
dialect: 'sqlite',
rows: [[3]],
totalRows: 1,
plan: {
execution: {
mode: 'executed',
driver: 'sqlite',
maxRows: 20,
rowCount: 1,
},
},
});
});
it('emits sl list as a JSON envelope when output=json', async () => {
const projectDir = join(tempDir, 'project');
await seedSlSource({ projectDir });
const listIo = makeIo();
const code = await runKtxSl(
{ command: 'list', projectDir, connectionId: 'warehouse', output: 'json', cliVersion: '0.0.0-test' },
listIo.io,
);
expect(code).toBe(0);
expect(listIo.stderr()).toBe('');
const parsed = JSON.parse(listIo.stdout());
expect(parsed).toMatchObject({
kind: 'list',
data: {
items: expect.any(Array),
},
meta: {
command: 'sl list',
},
});
expect(parsed.data.items).toHaveLength(1);
expect(parsed.data.items[0]).toMatchObject({
connectionId: 'warehouse',
name: 'orders',
columnCount: 1,
measureCount: 0,
joinCount: 0,
});
});
it('search prints embeddings status when results are empty', async () => {
const stderr: string[] = [];
const io = {
stdout: { write: (_chunk: string) => {} },
stderr: {
write: (chunk: string) => {
stderr.push(chunk);
},
},
};
const projectDir = join(tempDir, 'empty-status');
const project = await initKtxProject({ projectDir });
await expect(
runKtxSl(
{
command: 'search',
projectDir: project.projectDir,
query: 'nope',
cliVersion: '0.5.0',
},
io,
{
loadProject: async () => project,
resolveEmbeddingProvider: async () => ({
kind: 'managed-unavailable',
reason: 'managed embeddings daemon is not running',
}),
searchLocalSlSources: async () => [],
},
),
).resolves.toBe(0);
expect(stderr.join('')).toMatch(/embeddings: unavailable/);
expect(stderr.join('')).toMatch(/managed embeddings daemon is not running/);
});
it('passes a managed-daemon-backed embedding service into the search', async () => {
const projectDir = join(tempDir, 'resolver-project');
const project = await initKtxProject({ projectDir });
const search = vi.fn(async () => []);
const searchIo = makeIo();
await expect(
runKtxSl(
{
command: 'search',
projectDir: project.projectDir,
query: 'income',
cliVersion: '0.5.0',
json: true,
},
searchIo.io,
{
loadProject: async () => project,
resolveEmbeddingProvider: async () => ({
kind: 'managed-running',
provider: { id: 'fake' } as never,
baseUrl: 'http://127.0.0.1:51234',
}),
searchLocalSlSources: search,
},
),
).resolves.toBe(0);
expect(search).toHaveBeenCalledWith(
project,
expect.objectContaining({ embeddingService: expect.any(Object) }),
);
});
it('emits sl list with grouping and Clack-style framing when output=pretty', async () => {
const projectDir = join(tempDir, 'project');
await seedSlSource({ projectDir });
const listIo = makeIo();
const code = await runKtxSl(
{ command: 'list', projectDir, connectionId: 'warehouse', output: 'pretty', cliVersion: '0.0.0-test' },
listIo.io,
);
expect(code).toBe(0);
const stripAnsi = (s: string) => s.replace(/\[[0-9;]*m/g, '');
const out = stripAnsi(listIo.stdout());
expect(out).toContain('sl list');
expect(out).toContain('warehouse');
expect(out).toContain('orders');
expect(out).toContain('1 source');
});
});