mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
926 lines
33 KiB
TypeScript
926 lines
33 KiB
TypeScript
import { execFile } from 'node:child_process';
|
|
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { join, resolve } from 'node:path';
|
|
import { promisify } from 'node:util';
|
|
import { parseKtxProjectConfig } from '@ktx/context/project';
|
|
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
|
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
|
import Database from 'better-sqlite3';
|
|
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
const CLI_BIN = resolve(process.cwd(), 'dist/bin.js');
|
|
|
|
interface CliResult {
|
|
code: number;
|
|
stdout: string;
|
|
stderr: string;
|
|
}
|
|
|
|
interface ExecFailure extends Error {
|
|
code?: number;
|
|
stdout?: string;
|
|
stderr?: string;
|
|
}
|
|
|
|
function isExecFailure(error: unknown): error is ExecFailure {
|
|
return error instanceof Error && ('stdout' in error || 'stderr' in error || 'code' in error);
|
|
}
|
|
|
|
async function runBuiltCli(args: string[], options: { env?: NodeJS.ProcessEnv } = {}): Promise<CliResult> {
|
|
try {
|
|
const result = await execFileAsync(process.execPath, [CLI_BIN, ...args], {
|
|
encoding: 'utf8',
|
|
timeout: 20_000,
|
|
...(options.env ? { env: options.env } : {}),
|
|
});
|
|
return {
|
|
code: 0,
|
|
stdout: result.stdout,
|
|
stderr: result.stderr,
|
|
};
|
|
} catch (error) {
|
|
if (!isExecFailure(error)) {
|
|
throw error;
|
|
}
|
|
return {
|
|
code: typeof error.code === 'number' ? error.code : 1,
|
|
stdout: error.stdout ?? '',
|
|
stderr: error.stderr ?? error.message,
|
|
};
|
|
}
|
|
}
|
|
|
|
function getRunId(stdout: string): string {
|
|
const match = stdout.match(/^Run: (.+)$/m);
|
|
if (!match) {
|
|
throw new Error(`Could not find run id in output:\n${stdout}`);
|
|
}
|
|
return match[1];
|
|
}
|
|
|
|
function structuredContent<T extends object>(result: unknown): T {
|
|
const content = (result as { structuredContent?: unknown }).structuredContent;
|
|
expect(content).toBeDefined();
|
|
return content as T;
|
|
}
|
|
|
|
async function writeWarehouseConfig(projectDir: string): Promise<void> {
|
|
await writeFile(
|
|
join(projectDir, 'ktx.yaml'),
|
|
[
|
|
'project: warehouse',
|
|
'connections:',
|
|
' warehouse:',
|
|
' driver: postgres',
|
|
'ingest:',
|
|
' adapters:',
|
|
' - fake',
|
|
'',
|
|
].join('\n'),
|
|
'utf-8',
|
|
);
|
|
}
|
|
|
|
async function writeSourceFixture(sourceDir: string): Promise<void> {
|
|
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
|
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
|
}
|
|
|
|
function createSqliteWarehouse(dbPath: string): void {
|
|
const db = new Database(dbPath);
|
|
try {
|
|
db.exec(`
|
|
PRAGMA foreign_keys = ON;
|
|
CREATE TABLE customers (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL
|
|
);
|
|
CREATE TABLE orders (
|
|
id INTEGER PRIMARY KEY,
|
|
customer_id INTEGER NOT NULL,
|
|
total NUMERIC,
|
|
created_at TEXT,
|
|
FOREIGN KEY(customer_id) REFERENCES customers(id)
|
|
);
|
|
INSERT INTO customers (id, name) VALUES (1, 'Ada'), (2, 'Grace');
|
|
INSERT INTO orders (id, customer_id, total, created_at)
|
|
VALUES (10, 1, 42.5, '2026-04-28'), (11, 2, 9.5, '2026-04-29');
|
|
`);
|
|
} finally {
|
|
db.close();
|
|
}
|
|
}
|
|
|
|
async function writeSqliteScanConfig(projectDir: string, dbPath: string, enrich = false): Promise<void> {
|
|
await writeFile(
|
|
join(projectDir, 'ktx.yaml'),
|
|
[
|
|
'project: warehouse',
|
|
'connections:',
|
|
' warehouse:',
|
|
' driver: sqlite',
|
|
` path: ${JSON.stringify(dbPath)}`,
|
|
' readonly: true',
|
|
'ingest:',
|
|
' adapters:',
|
|
' - live-database',
|
|
...(enrich
|
|
? [
|
|
'scan:',
|
|
' enrichment:',
|
|
' mode: deterministic',
|
|
' embeddings:',
|
|
' backend: deterministic',
|
|
' dimensions: 6',
|
|
]
|
|
: []),
|
|
'',
|
|
].join('\n'),
|
|
'utf-8',
|
|
);
|
|
}
|
|
|
|
function parseJsonOutput<T>(stdout: string): T {
|
|
return JSON.parse(stdout) as T;
|
|
}
|
|
|
|
async function runSetupNewProject(projectDir: string): Promise<CliResult> {
|
|
return await runBuiltCli([
|
|
'setup',
|
|
'--project-dir',
|
|
projectDir,
|
|
'--new',
|
|
'--no-input',
|
|
'--yes',
|
|
'--skip-llm',
|
|
'--skip-embeddings',
|
|
'--skip-databases',
|
|
'--skip-sources',
|
|
'--skip-agents',
|
|
]);
|
|
}
|
|
|
|
describe('standalone built ktx CLI smoke', () => {
|
|
let tempDir: string;
|
|
|
|
beforeEach(async () => {
|
|
tempDir = await mkdtemp(join(tmpdir(), 'ktx-standalone-smoke-'));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await rm(tempDir, { recursive: true, force: true });
|
|
});
|
|
|
|
it('reports missing local ingest LLM config through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'project');
|
|
const sourceDir = join(tempDir, 'source');
|
|
|
|
const init = await runSetupNewProject(projectDir);
|
|
expect(init).toMatchObject({ code: 0, stderr: '' });
|
|
expect(init.stdout).toContain(`Project: ${projectDir}`);
|
|
|
|
await writeWarehouseConfig(projectDir);
|
|
await writeSourceFixture(sourceDir);
|
|
|
|
const run = await runBuiltCli([
|
|
'dev',
|
|
'ingest',
|
|
'run',
|
|
'--project-dir',
|
|
projectDir,
|
|
'--connection-id',
|
|
'warehouse',
|
|
'--adapter',
|
|
'fake',
|
|
'--source-dir',
|
|
sourceDir,
|
|
]);
|
|
expect(run).toMatchObject({ code: 1, stdout: '' });
|
|
expect(run.stderr).toContain(
|
|
'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner',
|
|
);
|
|
});
|
|
|
|
it('runs the default pre-seeded demo without credentials', async () => {
|
|
const result = await runBuiltCli(
|
|
['setup', 'demo', '--project-dir', join(tempDir, 'demo-project'), '--plain', '--no-input'],
|
|
{
|
|
env: { ...process.env, ANTHROPIC_API_KEY: '' },
|
|
},
|
|
);
|
|
|
|
expect(result).toMatchObject({ code: 0, stderr: '' });
|
|
expect(result.stdout).toContain('Mode: seeded');
|
|
expect(result.stdout).toContain('Source: packaged demo project');
|
|
expect(result.stdout).toContain('LLM calls: none');
|
|
expect(result.stdout).toContain('Warehouse:');
|
|
expect(result.stdout).toContain('dbt:');
|
|
expect(result.stdout).toContain('BI:');
|
|
expect(result.stdout).toContain('Notion:');
|
|
expect(result.stdout).toContain('Semantic-layer sources:');
|
|
expect(result.stdout).toContain('Knowledge pages:');
|
|
expect(result.stdout).toContain('ktx serve --mcp stdio');
|
|
expect(result.stdout).not.toContain(['--mode', 'deterministic'].join(' '));
|
|
});
|
|
|
|
it('runs hybrid agent search against the seeded demo through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'seeded-hybrid-search-project');
|
|
|
|
const seeded = await runBuiltCli(['setup', 'demo', '--project-dir', projectDir, '--plain', '--no-input'], {
|
|
env: { ...process.env, ANTHROPIC_API_KEY: '' },
|
|
});
|
|
expect(seeded).toMatchObject({ code: 0, stderr: '' });
|
|
expect(seeded.stdout).toContain('Mode: seeded');
|
|
|
|
const wikiSearch = await runBuiltCli([
|
|
'agent',
|
|
'wiki',
|
|
'search',
|
|
'ARR contract',
|
|
'--json',
|
|
'--limit',
|
|
'5',
|
|
'--project-dir',
|
|
projectDir,
|
|
]);
|
|
expect(wikiSearch).toMatchObject({ code: 0, stderr: '' });
|
|
const wikiJson = parseJsonOutput<{
|
|
results: Array<{ key: string; score: number; matchReasons?: string[] }>;
|
|
totalFound: number;
|
|
}>(wikiSearch.stdout);
|
|
expect(wikiJson.totalFound).toBeGreaterThan(0);
|
|
expect(wikiJson.results.some((result) => result.matchReasons?.length)).toBe(true);
|
|
|
|
const slSearch = await runBuiltCli([
|
|
'agent',
|
|
'sl',
|
|
'list',
|
|
'--json',
|
|
'--query',
|
|
'ARR',
|
|
'--project-dir',
|
|
projectDir,
|
|
]);
|
|
expect(slSearch).toMatchObject({ code: 0, stderr: '' });
|
|
const slJson = parseJsonOutput<{
|
|
sources: Array<{ connectionId: string; name: string; score?: number; matchReasons?: string[] }>;
|
|
totalSources: number;
|
|
}>(slSearch.stdout);
|
|
expect(slJson.totalSources).toBeGreaterThan(0);
|
|
expect(slJson.sources.some((source) => source.matchReasons?.length)).toBe(true);
|
|
});
|
|
|
|
it('prints guided JSON for agent semantic-layer search outside a project through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'missing-search-project');
|
|
await mkdir(projectDir, { recursive: true });
|
|
|
|
const result = await runBuiltCli([
|
|
'agent',
|
|
'sl',
|
|
'list',
|
|
'--json',
|
|
'--query',
|
|
'revenue',
|
|
'--project-dir',
|
|
projectDir,
|
|
]);
|
|
|
|
expect(result.code).toBe(1);
|
|
expect(result.stdout).toBe('');
|
|
const errorJson = parseJsonOutput<{
|
|
ok: false;
|
|
error: { code: string; message: string; nextSteps: string[] };
|
|
}>(result.stderr);
|
|
expect(errorJson).toEqual({
|
|
ok: false,
|
|
error: {
|
|
code: 'agent_sl_search_missing_project',
|
|
message: `Semantic-layer search needs an initialized KTX project at ${projectDir}.`,
|
|
nextSteps: [
|
|
'ktx demo',
|
|
`ktx setup --project-dir ${projectDir}`,
|
|
'ktx ingest <connection>',
|
|
`ktx agent sl list --json --query "revenue" --project-dir ${projectDir}`,
|
|
],
|
|
},
|
|
});
|
|
});
|
|
|
|
it('runs the pre-seeded demo and inspect without credentials', async () => {
|
|
const projectDir = join(tempDir, 'seeded-demo-project');
|
|
|
|
const seeded = await runBuiltCli(['setup', 'demo', '--mode', 'seeded', '--project-dir', projectDir, '--no-input']);
|
|
expect(seeded.code).toBe(0);
|
|
expect(seeded.stdout).toContain('Mode: seeded');
|
|
expect(seeded.stdout).toContain('LLM calls: none');
|
|
expect(seeded.stdout).toContain('Semantic-layer sources:');
|
|
expect(seeded.stdout).toContain('Knowledge pages:');
|
|
|
|
const inspect = await runBuiltCli(['setup', 'demo', 'inspect', '--project-dir', projectDir, '--no-input']);
|
|
expect(inspect).toMatchObject({ code: 0, stderr: '' });
|
|
expect(inspect.stdout).toContain('Mode: seeded');
|
|
expect(inspect.stdout).toContain('Status: ready');
|
|
expect(inspect.stdout).toContain('Warehouse: 8 tables, 11,234 rows');
|
|
expect(inspect.stdout).toContain('Rows: accounts 210, arr_movements 720');
|
|
expect(inspect.stdout).toContain('dbt: 3 models, 8 source tables');
|
|
expect(inspect.stdout).toContain('BI: 5 explores, 2 dashboards');
|
|
expect(inspect.stdout).toContain('Notion: 8 pages');
|
|
expect(inspect.stdout).toContain('Semantic-layer sources:');
|
|
expect(inspect.stdout).toContain('Knowledge pages:');
|
|
expect(inspect.stdout).toContain('Evidence links:');
|
|
expect(inspect.stdout).toContain('Report: reports/seeded-demo-report.json');
|
|
expect(inspect.stdout).toContain('Replay: replays/replay.memory-flow.v1.json');
|
|
expect(inspect.stdout).toContain('Latest replay: seeded (packaged, prebuilt)');
|
|
expect(inspect.stdout).toContain('ktx agent tools --json');
|
|
expect(inspect.stdout).toContain('ktx agent context --json');
|
|
expect(inspect.stdout).not.toContain('ktx ask "your question here"');
|
|
expect(inspect.stdout).toContain('ktx serve --mcp stdio');
|
|
});
|
|
|
|
it('serves seeded demo wiki and semantic-layer context over stdio MCP', async () => {
|
|
const projectDir = join(tempDir, 'seeded-mcp-project');
|
|
|
|
const seeded = await runBuiltCli(
|
|
['setup', 'demo', '--mode', 'seeded', '--project-dir', projectDir, '--plain', '--no-input'],
|
|
{
|
|
env: { ...process.env, ANTHROPIC_API_KEY: '' },
|
|
},
|
|
);
|
|
expect(seeded).toMatchObject({ code: 0, stderr: '' });
|
|
expect(seeded.stdout).toContain('Mode: seeded');
|
|
|
|
const client = new Client({ name: 'ktx-seeded-demo-smoke-client', version: '0.0.0' });
|
|
const transport = new StdioClientTransport({
|
|
command: process.execPath,
|
|
args: [CLI_BIN, 'serve', '--mcp', 'stdio', '--project-dir', projectDir, '--user-id', 'smoke-user'],
|
|
stderr: 'pipe',
|
|
});
|
|
|
|
try {
|
|
await client.connect(transport);
|
|
const toolNames = (await client.listTools()).tools.map((tool) => tool.name).sort();
|
|
expect(toolNames).toEqual(
|
|
expect.arrayContaining(['knowledge_read', 'knowledge_search', 'sl_read_source', 'sl_validate']),
|
|
);
|
|
|
|
const knowledgeSearch = structuredContent<{
|
|
results: Array<{ key: string; summary: string; score: number }>;
|
|
totalFound: number;
|
|
}>(await client.callTool({ name: 'knowledge_search', arguments: { query: 'ARR contract-first definition', limit: 10 } }));
|
|
expect(knowledgeSearch.totalFound).toBeGreaterThan(0);
|
|
expect(knowledgeSearch.results.map((result) => result.key)).toContain('orbit-arr-contract-first-definition');
|
|
|
|
const knowledgeRead = structuredContent<{
|
|
key: string;
|
|
summary: string;
|
|
content: string;
|
|
tags: string[];
|
|
slRefs: string[];
|
|
}>(await client.callTool({ name: 'knowledge_read', arguments: { key: 'orbit-arr-contract-first-definition' } }));
|
|
expect(knowledgeRead.key).toBe('orbit-arr-contract-first-definition');
|
|
expect(knowledgeRead.summary).toContain('ARR');
|
|
expect(knowledgeRead.content).toContain('contract');
|
|
expect(knowledgeRead.slRefs).toContain('mart_arr_daily');
|
|
|
|
const slRead = structuredContent<{ sourceName: string; yaml: string }>(
|
|
await client.callTool({
|
|
name: 'sl_read_source',
|
|
arguments: { connectionId: 'dbt-main', sourceName: 'mart_arr_daily' },
|
|
}),
|
|
);
|
|
expect(slRead.sourceName).toBe('mart_arr_daily');
|
|
expect(slRead.yaml).toContain('name: mart_arr_daily');
|
|
expect(slRead.yaml).toContain('measures:');
|
|
|
|
const slValidate = structuredContent<{ success: boolean; errors: string[]; warnings: string[] }>(
|
|
await client.callTool({
|
|
name: 'sl_validate',
|
|
arguments: { connectionId: 'dbt-main', names: ['mart_arr_daily', 'stg_contracts'] },
|
|
}),
|
|
);
|
|
expect(slValidate.success).toBe(true);
|
|
expect(slValidate.errors).toEqual([]);
|
|
} finally {
|
|
await client.close();
|
|
}
|
|
});
|
|
|
|
it('runs doctor setup through the built binary', async () => {
|
|
const result = await runBuiltCli(['dev', 'doctor', 'setup', '--no-input']);
|
|
|
|
expect(result.stdout).toContain('KTX setup doctor');
|
|
expect(result.stdout).toContain('Node 22+');
|
|
expect(result.stdout).toContain('Workspace-local CLI');
|
|
expect(result.stderr).toBe('');
|
|
expect([0, 1]).toContain(result.code);
|
|
});
|
|
|
|
it('reports missing Anthropic credentials for full demo through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'full-demo-missing-key');
|
|
|
|
const result = await runBuiltCli(['setup', 'demo', '--mode', 'full', '--project-dir', projectDir, '--no-input'], {
|
|
env: { ...process.env, ANTHROPIC_API_KEY: '' },
|
|
});
|
|
|
|
expect(result.code).toBe(1);
|
|
expect(result.stderr).toContain('ktx setup demo --mode full needs ANTHROPIC_API_KEY');
|
|
expect(result.stderr).toContain('ktx setup demo --mode seeded --no-input');
|
|
});
|
|
|
|
it('requires force for demo reset through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'reset-demo-project');
|
|
|
|
const init = await runBuiltCli(['setup', 'demo', 'init', '--project-dir', projectDir, '--no-input']);
|
|
expect(init).toMatchObject({ code: 0, stderr: '' });
|
|
|
|
const withoutForce = await runBuiltCli(['setup', 'demo', 'reset', '--project-dir', projectDir, '--no-input']);
|
|
expect(withoutForce.code).toBe(1);
|
|
expect(withoutForce.stderr).toContain(
|
|
`ktx setup demo reset is destructive; pass --force to recreate ${projectDir}`,
|
|
);
|
|
|
|
const withForce = await runBuiltCli([
|
|
'setup',
|
|
'demo',
|
|
'reset',
|
|
'--project-dir',
|
|
projectDir,
|
|
'--force',
|
|
'--no-input',
|
|
]);
|
|
expect(withForce).toMatchObject({ code: 0, stderr: '' });
|
|
expect(withForce.stdout).toContain(`Demo project reset: ${projectDir}`);
|
|
});
|
|
|
|
it('reports corrupted demo state with reset guidance through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'corrupt-demo-project');
|
|
|
|
const init = await runBuiltCli(['setup', 'demo', 'init', '--project-dir', projectDir, '--no-input']);
|
|
expect(init).toMatchObject({ code: 0, stderr: '' });
|
|
await rm(join(projectDir, 'demo.db'), { force: true });
|
|
|
|
const replay = await runBuiltCli(['setup', 'demo', '--mode', 'replay', '--project-dir', projectDir, '--no-input']);
|
|
expect(replay.code).toBe(1);
|
|
expect(replay.stderr).toContain(`Demo project is not ready at ${projectDir}: missing demo.db`);
|
|
expect(replay.stderr).toContain(`ktx setup demo reset --project-dir ${projectDir} --force --no-input`);
|
|
});
|
|
|
|
it('runs demo doctor through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'doctor-demo-project');
|
|
|
|
const init = await runBuiltCli(['setup', 'demo', 'init', '--project-dir', projectDir, '--no-input']);
|
|
expect(init).toMatchObject({ code: 0, stderr: '' });
|
|
|
|
const result = await runBuiltCli(['setup', 'demo', 'doctor', '--project-dir', projectDir, '--no-input']);
|
|
expect(result.stdout).toContain('KTX demo doctor');
|
|
expect(result.stdout).toContain('Demo dataset');
|
|
expect(result.stdout).toContain('Demo replay');
|
|
expect(result.stdout).toContain('Demo LLM provider');
|
|
expect(result.stderr).toBe('');
|
|
expect([0, 1]).toContain(result.code);
|
|
});
|
|
|
|
it('runs demo ingest seeded mode through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'seeded-ingest-alias');
|
|
|
|
const result = await runBuiltCli([
|
|
'setup',
|
|
'demo',
|
|
'ingest',
|
|
'--mode',
|
|
'seeded',
|
|
'--project-dir',
|
|
projectDir,
|
|
'--no-input',
|
|
]);
|
|
|
|
expect(result.code).toBe(0);
|
|
expect(result.stdout).toContain('Mode: seeded');
|
|
expect(result.stdout).toContain('LLM calls: none');
|
|
});
|
|
|
|
it('runs structural and enriched scans through the built binary with manifest artifacts', async () => {
|
|
const projectDir = join(tempDir, 'scan-project');
|
|
const init = await runSetupNewProject(projectDir);
|
|
expect(init).toMatchObject({ code: 0, stderr: '' });
|
|
|
|
const dbPath = join(projectDir, 'warehouse.db');
|
|
createSqliteWarehouse(dbPath);
|
|
await writeSqliteScanConfig(projectDir, dbPath);
|
|
|
|
const connectionTest = await runBuiltCli(['connection', 'test', 'warehouse', '--project-dir', projectDir]);
|
|
expect(connectionTest).toMatchObject({ code: 0, stderr: '' });
|
|
expect(connectionTest.stdout).toContain('Connection test passed: warehouse');
|
|
expect(connectionTest.stdout).toContain('Driver: sqlite');
|
|
expect(connectionTest.stdout).toContain('Tables: 2');
|
|
|
|
const structural = await runBuiltCli(['dev', 'scan', 'warehouse', '--project-dir', projectDir]);
|
|
expect(structural).toMatchObject({ code: 0, stderr: '' });
|
|
expect(structural.stdout).toContain('Status: done');
|
|
expect(structural.stdout).toContain('Mode: structural');
|
|
const structuralRunId = getRunId(structural.stdout);
|
|
|
|
const structuralReportResult = await runBuiltCli([
|
|
'dev',
|
|
'scan',
|
|
'report',
|
|
'--json',
|
|
'--project-dir',
|
|
projectDir,
|
|
structuralRunId,
|
|
]);
|
|
expect(structuralReportResult).toMatchObject({ code: 0, stderr: '' });
|
|
const structuralReport = parseJsonOutput<{
|
|
mode: string;
|
|
artifactPaths: { manifestShards: string[]; enrichmentArtifacts: string[] };
|
|
manifestShardsWritten: number;
|
|
}>(structuralReportResult.stdout);
|
|
expect(structuralReport.mode).toBe('structural');
|
|
expect(structuralReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']);
|
|
expect(structuralReport.artifactPaths.enrichmentArtifacts).toEqual([]);
|
|
expect(structuralReport.manifestShardsWritten).toBe(1);
|
|
|
|
const structuralManifest = await readFile(
|
|
join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'),
|
|
'utf-8',
|
|
);
|
|
expect(structuralManifest).toContain('customers:');
|
|
expect(structuralManifest).toContain('orders:');
|
|
expect(structuralManifest).toContain('source: formal');
|
|
expect(structuralManifest).not.toContain('ai:');
|
|
|
|
const providerlessEnriched = await runBuiltCli([
|
|
'dev',
|
|
'scan',
|
|
'warehouse',
|
|
'--project-dir',
|
|
projectDir,
|
|
'--mode',
|
|
'enriched',
|
|
]);
|
|
expect(providerlessEnriched).toMatchObject({ code: 0, stderr: '' });
|
|
expect(providerlessEnriched.stdout).toContain('Mode: enriched');
|
|
expect(providerlessEnriched.stdout).toContain('Relationships');
|
|
expect(providerlessEnriched.stdout).toContain('Accepted: 1');
|
|
expect(providerlessEnriched.stdout).toContain('scan_enrichment_backend_not_configured');
|
|
expect(providerlessEnriched.stdout).toContain('Enrichment artifacts: 3');
|
|
const providerlessRunId = getRunId(providerlessEnriched.stdout);
|
|
|
|
const providerlessReportResult = await runBuiltCli([
|
|
'dev',
|
|
'scan',
|
|
'report',
|
|
'--json',
|
|
'--project-dir',
|
|
projectDir,
|
|
providerlessRunId,
|
|
]);
|
|
expect(providerlessReportResult).toMatchObject({ code: 0, stderr: '' });
|
|
const providerlessReport = parseJsonOutput<{
|
|
mode: string;
|
|
enrichment: {
|
|
tableDescriptions: string;
|
|
columnDescriptions: string;
|
|
embeddings: string;
|
|
deterministicRelationships: string;
|
|
statisticalValidation: string;
|
|
};
|
|
relationships: { accepted: number; review: number; rejected: number; skipped: number };
|
|
warnings: Array<{ code: string }>;
|
|
artifactPaths: { enrichmentArtifacts: string[]; manifestShards: string[] };
|
|
}>(providerlessReportResult.stdout);
|
|
expect(providerlessReport.mode).toBe('enriched');
|
|
expect(providerlessReport.enrichment).toMatchObject({
|
|
tableDescriptions: 'skipped',
|
|
columnDescriptions: 'skipped',
|
|
embeddings: 'skipped',
|
|
deterministicRelationships: 'completed',
|
|
statisticalValidation: 'completed',
|
|
});
|
|
expect(providerlessReport.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 });
|
|
expect(providerlessReport.warnings).toEqual(
|
|
expect.arrayContaining([expect.objectContaining({ code: 'scan_enrichment_backend_not_configured' })]),
|
|
);
|
|
expect(providerlessReport.artifactPaths.enrichmentArtifacts).toEqual(
|
|
expect.arrayContaining([
|
|
expect.stringContaining('/enrichment/relationships.json'),
|
|
expect.stringContaining('/enrichment/relationship-profile.json'),
|
|
expect.stringContaining('/enrichment/relationship-diagnostics.json'),
|
|
]),
|
|
);
|
|
expect(providerlessReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']);
|
|
|
|
await writeSqliteScanConfig(projectDir, dbPath, true);
|
|
const enriched = await runBuiltCli(['dev', 'scan', 'warehouse', '--project-dir', projectDir, '--mode', 'enriched']);
|
|
expect(enriched).toMatchObject({ code: 0, stderr: '' });
|
|
expect(enriched.stdout).toContain('Mode: enriched');
|
|
const enrichedRunId = getRunId(enriched.stdout);
|
|
|
|
const enrichedReportResult = await runBuiltCli([
|
|
'dev',
|
|
'scan',
|
|
'report',
|
|
'--json',
|
|
'--project-dir',
|
|
projectDir,
|
|
enrichedRunId,
|
|
]);
|
|
expect(enrichedReportResult).toMatchObject({ code: 0, stderr: '' });
|
|
const enrichedReport = parseJsonOutput<{
|
|
mode: string;
|
|
enrichment: { tableDescriptions: string; columnDescriptions: string; embeddings: string };
|
|
artifactPaths: { enrichmentArtifacts: string[]; manifestShards: string[] };
|
|
}>(enrichedReportResult.stdout);
|
|
expect(enrichedReport.mode).toBe('enriched');
|
|
expect(enrichedReport.enrichment).toMatchObject({
|
|
tableDescriptions: 'completed',
|
|
columnDescriptions: 'completed',
|
|
embeddings: 'completed',
|
|
});
|
|
expect(enrichedReport.artifactPaths.enrichmentArtifacts).toEqual(
|
|
expect.arrayContaining([
|
|
expect.stringContaining('/enrichment/descriptions.json'),
|
|
expect.stringContaining('/enrichment/embeddings.json'),
|
|
expect.stringContaining('/enrichment/relationships.json'),
|
|
expect.stringContaining('/enrichment/relationship-profile.json'),
|
|
expect.stringContaining('/enrichment/relationship-diagnostics.json'),
|
|
]),
|
|
);
|
|
expect(enrichedReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']);
|
|
|
|
const enrichedManifest = await readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8');
|
|
expect(enrichedManifest).toContain('Deterministic description');
|
|
}, 30_000);
|
|
|
|
it('parses gateway LLM config and OpenAI enrichment embeddings used by standalone scans without network calls', async () => {
|
|
const projectDir = join(tempDir, 'gateway-config-project');
|
|
await mkdir(projectDir, { recursive: true });
|
|
await writeFile(
|
|
join(projectDir, 'ktx.yaml'),
|
|
[
|
|
'project: gateway-smoke',
|
|
'llm:',
|
|
' provider:',
|
|
' backend: gateway',
|
|
' gateway:',
|
|
' api_key: env:AI_GATEWAY_API_KEY', // pragma: allowlist secret
|
|
' models:',
|
|
' default: env:KTX_SCAN_LLM_MODEL',
|
|
'scan:',
|
|
' enrichment:',
|
|
' mode: llm',
|
|
' embeddings:',
|
|
' backend: openai',
|
|
' model: env:KTX_SCAN_EMBEDDING_MODEL',
|
|
' dimensions: 1536',
|
|
' openai:',
|
|
' api_key: env:OPENAI_API_KEY', // pragma: allowlist secret
|
|
' batchSize: 16',
|
|
'',
|
|
].join('\n'),
|
|
'utf8',
|
|
);
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf8'));
|
|
expect(config.llm).toEqual({
|
|
provider: {
|
|
backend: 'gateway',
|
|
gateway: { api_key: 'env:AI_GATEWAY_API_KEY' }, // pragma: allowlist secret
|
|
},
|
|
models: { default: 'env:KTX_SCAN_LLM_MODEL' },
|
|
});
|
|
expect(config.scan.enrichment).toEqual({
|
|
mode: 'llm',
|
|
embeddings: {
|
|
backend: 'openai',
|
|
model: 'env:KTX_SCAN_EMBEDDING_MODEL',
|
|
dimensions: 1536,
|
|
openai: { api_key: 'env:OPENAI_API_KEY' }, // pragma: allowlist secret
|
|
batchSize: 16,
|
|
},
|
|
});
|
|
});
|
|
|
|
it('adds a redacted Notion connection through the built binary', async () => {
|
|
const projectDir = join(tempDir, 'notion-project');
|
|
const init = await runSetupNewProject(projectDir);
|
|
expect(init).toMatchObject({ code: 0, stderr: '' });
|
|
|
|
const add = await runBuiltCli([
|
|
'connection',
|
|
'add',
|
|
'notion',
|
|
'notion-main',
|
|
'--project-dir',
|
|
projectDir,
|
|
'--token-env',
|
|
'NOTION_AUTH_TOKEN',
|
|
'--crawl-mode',
|
|
'all_accessible',
|
|
'--max-pages',
|
|
'5',
|
|
]);
|
|
|
|
expect(add).toMatchObject({ code: 0, stderr: '' });
|
|
expect(add.stdout).toContain('Connection: notion-main');
|
|
expect(add.stdout).toContain('Driver: notion');
|
|
|
|
const yaml = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8');
|
|
expect(yaml).toContain('driver: notion');
|
|
expect(yaml).toContain('auth_token_ref: env:NOTION_AUTH_TOKEN');
|
|
expect(yaml).toContain('crawl_mode: all_accessible');
|
|
expect(yaml).toContain('max_pages_per_run: 5');
|
|
expect(yaml).not.toContain('ntn_');
|
|
|
|
const parsed = parseKtxProjectConfig(yaml);
|
|
expect(parsed.connections['notion-main']).toMatchObject({
|
|
driver: 'notion',
|
|
auth_token_ref: 'env:NOTION_AUTH_TOKEN',
|
|
crawl_mode: 'all_accessible',
|
|
});
|
|
});
|
|
|
|
it('serves local ingest MCP tools over stdio from the built binary', async () => {
|
|
const projectDir = join(tempDir, 'project');
|
|
|
|
const init = await runSetupNewProject(projectDir);
|
|
expect(init).toMatchObject({ code: 0, stderr: '' });
|
|
await writeWarehouseConfig(projectDir);
|
|
|
|
const client = new Client({ name: 'ktx-smoke-client', version: '0.0.0' });
|
|
const transport = new StdioClientTransport({
|
|
command: process.execPath,
|
|
args: [CLI_BIN, 'serve', '--mcp', 'stdio', '--project-dir', projectDir, '--user-id', 'smoke-user'],
|
|
stderr: 'pipe',
|
|
});
|
|
|
|
try {
|
|
await client.connect(transport);
|
|
const tools = await client.listTools();
|
|
const toolNames = tools.tools.map((tool) => tool.name).sort();
|
|
expect(toolNames).toEqual(
|
|
expect.arrayContaining([
|
|
'connection_list',
|
|
'connection_test',
|
|
'ingest_report',
|
|
'ingest_replay',
|
|
'ingest_status',
|
|
'ingest_trigger',
|
|
'knowledge_read',
|
|
'knowledge_search',
|
|
'knowledge_write',
|
|
'scan_list_artifacts',
|
|
'scan_read_artifact',
|
|
'scan_report',
|
|
'scan_status',
|
|
'scan_trigger',
|
|
'sl_list_sources',
|
|
'sl_read_source',
|
|
'sl_validate',
|
|
'sl_write_source',
|
|
]),
|
|
);
|
|
|
|
const connections = structuredContent<{
|
|
connections: Array<{ id: string; name: string; connectionType: string }>;
|
|
}>(await client.callTool({ name: 'connection_list', arguments: {} }));
|
|
expect(connections).toEqual({
|
|
connections: [{ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' }],
|
|
});
|
|
|
|
await expect(client.callTool({ name: 'ingest_status', arguments: { runId: 'missing-run' } })).resolves.toEqual({
|
|
content: [{ type: 'text', text: 'Ingest run "missing-run" was not found.' }],
|
|
isError: true,
|
|
});
|
|
|
|
await expect(client.callTool({ name: 'ingest_report', arguments: { runId: 'missing-run' } })).resolves.toEqual({
|
|
content: [{ type: 'text', text: 'Ingest report "missing-run" was not found.' }],
|
|
isError: true,
|
|
});
|
|
|
|
await expect(client.callTool({ name: 'ingest_replay', arguments: { runId: 'missing-run' } })).resolves.toEqual({
|
|
content: [{ type: 'text', text: 'Ingest replay "missing-run" was not found.' }],
|
|
isError: true,
|
|
});
|
|
} finally {
|
|
await client.close();
|
|
}
|
|
});
|
|
|
|
it('serves scan execution and artifact inspection tools over stdio from the built binary', async () => {
|
|
const projectDir = join(tempDir, 'scan-mcp-project');
|
|
const init = await runSetupNewProject(projectDir);
|
|
expect(init).toMatchObject({ code: 0, stderr: '' });
|
|
|
|
const dbPath = join(projectDir, 'warehouse.db');
|
|
createSqliteWarehouse(dbPath);
|
|
await writeSqliteScanConfig(projectDir, dbPath);
|
|
|
|
const client = new Client({ name: 'ktx-scan-smoke-client', version: '0.0.0' });
|
|
const transport = new StdioClientTransport({
|
|
command: process.execPath,
|
|
args: [CLI_BIN, 'serve', '--mcp', 'stdio', '--project-dir', projectDir, '--user-id', 'smoke-user'],
|
|
stderr: 'pipe',
|
|
});
|
|
|
|
try {
|
|
await client.connect(transport);
|
|
|
|
const connectionTest = structuredContent<{
|
|
id: string;
|
|
connectionType: string;
|
|
ok: boolean;
|
|
tableCount: number | null;
|
|
}>(await client.callTool({ name: 'connection_test', arguments: { connectionId: 'warehouse' } }));
|
|
expect(connectionTest).toMatchObject({
|
|
id: 'warehouse',
|
|
connectionType: 'SQLITE',
|
|
ok: true,
|
|
tableCount: 2,
|
|
});
|
|
|
|
const trigger = structuredContent<{
|
|
runId: string;
|
|
status: 'done';
|
|
done: true;
|
|
connectionId: string;
|
|
mode: string;
|
|
dryRun: boolean;
|
|
report: {
|
|
artifactPaths: { manifestShards: string[] };
|
|
manifestShardsWritten: number;
|
|
};
|
|
}>(
|
|
await client.callTool({
|
|
name: 'scan_trigger',
|
|
arguments: {
|
|
connectionId: 'warehouse',
|
|
mode: 'structural',
|
|
detectRelationships: false,
|
|
dryRun: false,
|
|
},
|
|
}),
|
|
);
|
|
expect(trigger).toMatchObject({
|
|
status: 'done',
|
|
done: true,
|
|
connectionId: 'warehouse',
|
|
mode: 'structural',
|
|
dryRun: false,
|
|
});
|
|
expect(trigger.report.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']);
|
|
expect(trigger.report.manifestShardsWritten).toBe(1);
|
|
|
|
const status = structuredContent<{
|
|
runId: string;
|
|
status: string;
|
|
done: boolean;
|
|
reportPath: string | null;
|
|
}>(await client.callTool({ name: 'scan_status', arguments: { runId: trigger.runId } }));
|
|
expect(status).toMatchObject({
|
|
runId: trigger.runId,
|
|
status: 'done',
|
|
done: true,
|
|
});
|
|
expect(status.reportPath).toContain('scan-report.json');
|
|
|
|
const artifacts = structuredContent<{
|
|
runId: string;
|
|
artifacts: Array<{ path: string; type: string }>;
|
|
}>(await client.callTool({ name: 'scan_list_artifacts', arguments: { runId: trigger.runId } }));
|
|
expect(artifacts.artifacts).toEqual(
|
|
expect.arrayContaining([
|
|
expect.objectContaining({ path: 'semantic-layer/warehouse/_schema/public.yaml', type: 'manifest_shard' }),
|
|
expect.objectContaining({ type: 'report' }),
|
|
expect.objectContaining({ type: 'raw_source' }),
|
|
]),
|
|
);
|
|
|
|
const manifestArtifact = structuredContent<{
|
|
runId: string;
|
|
path: string;
|
|
type: string;
|
|
content: string;
|
|
}>(
|
|
await client.callTool({
|
|
name: 'scan_read_artifact',
|
|
arguments: {
|
|
runId: trigger.runId,
|
|
path: 'semantic-layer/warehouse/_schema/public.yaml',
|
|
},
|
|
}),
|
|
);
|
|
expect(manifestArtifact).toMatchObject({
|
|
runId: trigger.runId,
|
|
path: 'semantic-layer/warehouse/_schema/public.yaml',
|
|
type: 'manifest_shard',
|
|
});
|
|
expect(manifestArtifact.content).toContain('orders:');
|
|
expect(manifestArtifact.content).toContain('source: formal');
|
|
} finally {
|
|
await client.close();
|
|
}
|
|
});
|
|
});
|