feat(mcp):added MCP server (#97)

* docs(specs): design research-agent MCP tools and ktx mcp daemon

Adds the 2026-05-14 design spec for exposing four new MCP tools
(discover_data, entity_details, dictionary_search, sql_execution),
shipping a ktx-research skill, and introducing an HTTP-only ktx mcp
daemon so external agents can use KTX as a research-capable context
layer.

* Refine research-agent MCP tools spec after adversarial review iteration 1

* Refine research-agent MCP tools spec after adversarial review iteration 2

* Refine research-agent MCP tools spec after adversarial review iteration 3

* Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind

* feat(daemon): validate read-only SQL with sqlglot

* feat(context): expose read-only SQL validation port

* feat(context): register MCP sql execution tool

* feat(context): execute MCP SQL through validated connector path

* test(context): update SQL analysis port fixtures

* docs: add research-agent MCP sql execution foundation plan

* feat(context): add scan-backed entity details service

* feat(context): register MCP entity details tool

* feat(context): expose local MCP entity details

* test(context): align entity details scan fixtures

* docs: add research-agent MCP entity_details plan

* feat(context): add dictionary search service

* feat(context): register MCP dictionary search tool

* feat(context): expose local MCP dictionary search

* docs: add research-agent MCP dictionary_search plan

* feat: add MCP discover data service

* feat: expose discover data MCP tool

* feat: wire local discover data MCP port

* docs: add research-agent MCP discover_data plan

* feat(cli): add mcp http security helpers

* feat(cli): host mcp over streamable http

* feat(cli): manage mcp daemon lifecycle

* feat(cli): add ktx mcp commands

* fix(cli): stabilize mcp daemon verification

* docs: add research-agent MCP http daemon plan

* feat(cli): install KTX research skill

* feat(cli): configure MCP clients in setup agents

* feat(cli): support Claude local MCP setup scope

* docs: add research-agent MCP setup-agents plan

* refactor(context): use connectionId in warehouse verification tools

* docs(context): update ingest verification prompts for connectionId

* docs: add research-agent MCP ingest contract convergence plan

* chore: build runtime artifacts in conductor setup

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
Andrey Avtomonov 2026-05-15 02:35:09 +02:00 committed by GitHub
parent c7b64379bf
commit b759a4a286
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
78 changed files with 13689 additions and 190 deletions

View file

@ -26,7 +26,7 @@
],
"scripts": {
"assets:demo": "node scripts/build-demo-assets.mjs",
"build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node ../../scripts/prepare-cli-bin.mjs",
"build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node scripts/copy-runtime-assets.mjs && node ../../scripts/prepare-cli-bin.mjs",
"docs:commands": "pnpm run build && node dist/print-command-tree.js",
"smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000",
"test": "vitest run --exclude src/standalone-smoke.test.ts --exclude src/example-smoke.test.ts --exclude src/setup-databases.test.ts --exclude src/scan.test.ts --exclude src/commands/connection-metabase-setup.test.ts --exclude src/setup-models.test.ts --exclude src/setup-sources.test.ts --exclude src/setup.test.ts --exclude src/connection.test.ts --exclude src/setup-embeddings.test.ts --exclude src/ingest.test.ts --exclude src/commands/connection-mapping.test.ts --exclude src/ingest-viz.test.ts --exclude src/demo.test.ts --exclude src/setup-project.test.ts --exclude src/sl.test.ts --exclude src/local-scan-connectors.test.ts --exclude src/commands/connection-notion.test.ts",
@ -45,6 +45,7 @@
"@ktx/connector-sqlserver": "workspace:*",
"@ktx/context": "workspace:*",
"@ktx/llm": "workspace:*",
"@modelcontextprotocol/sdk": "^1.29.0",
"commander": "14.0.3",
"ink": "^7.0.2",
"react": "^19.2.6",

View file

@ -0,0 +1,11 @@
import { cp, mkdir, rm } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
const packageRoot = fileURLToPath(new URL('..', import.meta.url));
const skillsSource = join(packageRoot, 'src', 'skills');
const skillsTarget = join(packageRoot, 'dist', 'skills');
await rm(skillsTarget, { recursive: true, force: true });
await mkdir(dirname(skillsTarget), { recursive: true });
await cp(skillsSource, skillsTarget, { recursive: true });

View file

@ -5,6 +5,7 @@ import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
import { registerConnectionCommands } from './commands/connection-commands.js';
import { registerIngestCommands } from './commands/ingest-commands.js';
import { registerWikiCommands } from './commands/knowledge-commands.js';
import { registerMcpCommands } from './commands/mcp-commands.js';
import { registerSetupCommands } from './commands/setup-commands.js';
import { registerSlCommands } from './commands/sl-commands.js';
import { registerStatusCommands } from './commands/status-commands.js';
@ -55,7 +56,7 @@ type CommandPathNode = CommandWithGlobalOptions & {
parent?: CommandPathNode | null;
};
const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status']);
const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'mcp']);
const COMMANDS_THAT_CREATE_PROJECT = new Set(['setup', 'ktx dev init']);
const COMMANDS_WITH_OWN_MISSING_PROJECT_HANDLING = new Set(['status']);
const GLOBAL_OPTIONS_WITH_VALUE = new Set(['--project-dir']);
@ -412,6 +413,7 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
registerWikiCommands(program, context);
registerSlCommands(program, context);
registerStatusCommands(program, context);
registerMcpCommands(program, context);
registerDevCommands(program, context);
return program;

View file

@ -34,6 +34,12 @@ export interface KtxCliDeps {
runtime?: (args: KtxRuntimeArgs, io: KtxCliIo) => Promise<number>;
knowledge?: (args: KtxKnowledgeArgs, io: KtxCliIo) => Promise<number>;
sl?: (args: KtxSlArgs, io: KtxCliIo) => Promise<number>;
mcp?: {
startDaemon?: typeof import('./managed-mcp-daemon.js').startKtxMcpDaemon;
stopDaemon?: typeof import('./managed-mcp-daemon.js').stopKtxMcpDaemon;
readStatus?: typeof import('./managed-mcp-daemon.js').readKtxMcpDaemonStatus;
runServer?: typeof import('./mcp-http-server.js').runKtxMcpHttpServer;
};
}
export function getKtxCliPackageInfo(): KtxCliPackageInfo {

View file

@ -0,0 +1,57 @@
import { Command } from '@commander-js/extra-typings';
import { describe, expect, it, vi } from 'vitest';
import type { KtxCliCommandContext } from '../cli-program.js';
import { registerMcpCommands } from './mcp-commands.js';
function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliCommandContext {
let exitCode = 0;
return {
io: {
stdout: { write: vi.fn() },
stderr: { write: vi.fn() },
},
deps: {},
packageInfo: { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' },
setExitCode: (code) => {
exitCode = code;
},
runInit: vi.fn(),
writeDebug: vi.fn(),
...overrides,
get exitCode() {
return exitCode;
},
} as KtxCliCommandContext;
}
describe('registerMcpCommands', () => {
it('registers the public mcp lifecycle commands', () => {
const program = new Command().exitOverride();
registerMcpCommands(program, makeContext());
const mcp = program.commands.find((command) => command.name() === 'mcp');
expect(mcp?.commands.map((command) => command.name()).sort()).toEqual([
'logs',
'serve-internal',
'start',
'status',
'stop',
]);
expect(
(mcp?.commands.find((command) => command.name() === 'serve-internal') as { _hidden?: boolean } | undefined)
?._hidden,
).toBe(true);
});
it('rejects non-loopback start without token before spawning', async () => {
const program = new Command().exitOverride();
const startDaemon = vi.fn();
const context = makeContext({ deps: { mcp: { startDaemon } } });
registerMcpCommands(program, context);
await expect(program.parseAsync(['mcp', 'start', '--host', '0.0.0.0'], { from: 'user' })).rejects.toThrow(
'Binding KTX MCP to 0.0.0.0 requires --token or KTX_MCP_TOKEN',
);
expect(startDaemon).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,136 @@
import { spawn } from 'node:child_process';
import { readFile } from 'node:fs/promises';
import { fileURLToPath } from 'node:url';
import { Command } from '@commander-js/extra-typings';
import type { KtxCliCommandContext } from '../cli-program.js';
import {
collectOption,
parsePositiveIntegerOption,
resolveCommandProjectDir,
} from '../cli-program.js';
import {
mcpDaemonLayout,
readKtxMcpDaemonStatus,
startKtxMcpDaemon,
stopKtxMcpDaemon,
} from '../managed-mcp-daemon.js';
import { buildMcpSecurityConfig, runKtxMcpHttpServer } from '../mcp-http-server.js';
function tokenFromOption(value: string | undefined): string | undefined {
return value ?? process.env.KTX_MCP_TOKEN;
}
function binPath(): string {
return fileURLToPath(new URL('../bin.js', import.meta.url));
}
export function registerMcpCommands(program: Command, context: KtxCliCommandContext): void {
const mcp = program.command('mcp').description('Run the KTX MCP HTTP server');
mcp
.command('start')
.description('Start the KTX MCP HTTP server')
.option('--host <host>', 'Host to bind', '127.0.0.1')
.option('--port <n>', 'Port to bind', parsePositiveIntegerOption, 7878)
.option('--token <token>', 'Bearer token required for non-loopback binding')
.option('--foreground', 'Run in the foreground', false)
.option('--allowed-host <host>', 'Additional allowed Host header', collectOption, [])
.option('--allowed-origin <origin>', 'Allowed browser Origin header', collectOption, [])
.action(async (options, command) => {
const projectDir = resolveCommandProjectDir(command);
const token = tokenFromOption(options.token);
buildMcpSecurityConfig({
host: options.host,
port: options.port,
token,
allowedHosts: options.allowedHost,
allowedOrigins: options.allowedOrigin,
});
if (options.foreground) {
await (context.deps.mcp?.runServer ?? runKtxMcpHttpServer)({
projectDir,
cliVersion: context.packageInfo.version,
host: options.host,
port: options.port,
token,
allowedHosts: options.allowedHost,
allowedOrigins: options.allowedOrigin,
io: context.io,
});
context.io.stdout.write(`KTX MCP server listening at http://${options.host}:${options.port}/mcp\n`);
return;
}
const result = await (context.deps.mcp?.startDaemon ?? startKtxMcpDaemon)({
projectDir,
cliVersion: context.packageInfo.version,
host: options.host,
port: options.port,
token,
allowedHosts: options.allowedHost,
allowedOrigins: options.allowedOrigin,
binPath: binPath(),
});
context.io.stdout.write(`KTX MCP daemon started: ${result.url}\n`);
});
mcp
.command('stop')
.description('Stop the KTX MCP daemon')
.action(async (_options, command) => {
const result = await (context.deps.mcp?.stopDaemon ?? stopKtxMcpDaemon)({
projectDir: resolveCommandProjectDir(command),
});
context.io.stdout.write(result.status === 'stopped' ? 'KTX MCP daemon stopped.\n' : 'KTX MCP daemon is not running.\n');
});
mcp
.command('status')
.description('Show KTX MCP daemon status')
.action(async (_options, command) => {
const status = await (context.deps.mcp?.readStatus ?? readKtxMcpDaemonStatus)({
projectDir: resolveCommandProjectDir(command),
});
context.io.stdout.write(`${status.detail}\n`);
if (status.kind === 'running') {
context.io.stdout.write(`URL: ${status.url}\n`);
context.io.stdout.write(`PID: ${status.state.pid}\n`);
context.io.stdout.write(`Token auth: ${status.state.tokenAuth ? 'enabled' : 'disabled'}\n`);
context.io.stdout.write(`Project: ${status.state.projectDir}\n`);
}
});
mcp
.command('logs')
.description('Print the KTX MCP daemon log')
.option('--follow', 'Follow log output', false)
.action(async (options, command) => {
const logPath = mcpDaemonLayout(resolveCommandProjectDir(command)).logPath;
if (options.follow) {
const child = spawn('tail', ['-f', logPath], { stdio: ['ignore', 'pipe', 'pipe'] });
child.stdout?.on('data', (chunk: Buffer) => context.io.stdout.write(chunk.toString('utf8')));
child.stderr?.on('data', (chunk: Buffer) => context.io.stderr.write(chunk.toString('utf8')));
await new Promise((resolve) => child.on('close', resolve));
return;
}
context.io.stdout.write(await readFile(logPath, 'utf8'));
});
mcp
.command('serve-internal', { hidden: true })
.option('--host <host>', 'Host to bind', '127.0.0.1')
.requiredOption('--port <n>', 'Port to bind', parsePositiveIntegerOption)
.option('--allowed-host <host>', 'Additional allowed Host header', collectOption, [])
.option('--allowed-origin <origin>', 'Allowed browser Origin header', collectOption, [])
.action(async (options, command) => {
await (context.deps.mcp?.runServer ?? runKtxMcpHttpServer)({
projectDir: resolveCommandProjectDir(command),
cliVersion: context.packageInfo.version,
host: options.host,
port: options.port,
token: process.env.KTX_MCP_TOKEN,
allowedHosts: options.allowedHost,
allowedOrigins: options.allowedOrigin,
io: context.io,
});
});
}

View file

@ -90,6 +90,7 @@ function shouldShowSetupEntryMenu(
agents?: boolean;
target?: string;
global?: boolean;
local?: boolean;
skipAgents?: boolean;
yes?: boolean;
input?: boolean;
@ -163,6 +164,7 @@ function shouldShowSetupEntryMenu(
'agents',
'target',
'global',
'local',
'skipAgents',
'yes',
'input',
@ -223,6 +225,7 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
]),
)
.option('--global', 'Install agent integration into the global target scope', false)
.option('--local', 'Install Claude Code MCP config into the private per-project ~/.claude.json scope', false)
.addOption(new Option('--skip-agents', 'Leave agent integration incomplete for now').hideHelp().default(false))
.option('--yes', 'Accept safe defaults in non-interactive setup', false)
.option('--no-input', 'Disable interactive terminal input')
@ -392,9 +395,19 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
context.setExitCode(1);
return;
}
if (options.local && options.global) {
context.io.stderr.write('Choose only one agent scope: --local or --global.\n');
context.setExitCode(1);
return;
}
if (options.local && options.target && options.target !== 'claude-code') {
context.io.stderr.write('--local is only supported with --target claude-code.\n');
context.setExitCode(1);
return;
}
const mode = options.new ? 'new' : options.existing ? 'existing' : 'auto';
const resolvedAgentScope = options.global ? 'global' : 'project';
const resolvedAgentScope = options.local ? 'local' : options.global ? 'global' : 'project';
await runSetupArgs(context, {
command: 'run',
projectDir: resolveCommandProjectDir(command),

View file

@ -440,6 +440,7 @@ describe('runKtxCli', () => {
expect(stdout).toContain('--agents');
expect(stdout).toContain('--target <target>');
expect(stdout).toContain('--global');
expect(stdout).toContain('--local');
expect(stdout).toContain('--yes');
expect(stdout).toContain('--no-input');
expect(stdout).toContain('Global Options:');
@ -1286,6 +1287,38 @@ describe('runKtxCli', () => {
);
});
it('rejects --local with non-Claude targets', async () => {
const setup = vi.fn(async () => 0);
const setupIo = makeIo();
await expect(
runKtxCli(
['--project-dir', tempDir, 'setup', '--agents', '--target', 'cursor', '--local', '--no-input'],
setupIo.io,
{ setup },
),
).resolves.toBe(1);
expect(setupIo.stderr()).toContain('--local is only supported with --target claude-code');
expect(setup).not.toHaveBeenCalled();
});
it('rejects --local and --global together', async () => {
const setup = vi.fn(async () => 0);
const setupIo = makeIo();
await expect(
runKtxCli(
['--project-dir', tempDir, 'setup', '--agents', '--target', 'claude-code', '--local', '--global', '--no-input'],
setupIo.io,
{ setup },
),
).resolves.toBe(1);
expect(setupIo.stderr()).toContain('Choose only one agent scope: --local or --global.');
expect(setup).not.toHaveBeenCalled();
});
it('rejects source-path with source-git-url', async () => {
const setup = vi.fn(async () => 0);
const testIo = makeIo();

View file

@ -18,6 +18,9 @@ function sqlAnalysisStub() {
async analyzeBatch() {
return new Map();
},
async validateReadOnly() {
return { ok: true };
},
};
}

View file

@ -0,0 +1,133 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
mcpDaemonLayout,
readKtxMcpDaemonStatus,
startKtxMcpDaemon,
stopKtxMcpDaemon,
type KtxMcpDaemonChild,
type KtxMcpDaemonState,
} from './managed-mcp-daemon.js';
function child(pid = 4242): KtxMcpDaemonChild {
return { pid, unref: vi.fn() };
}
function state(projectDir: string, overrides: Partial<KtxMcpDaemonState> = {}): KtxMcpDaemonState {
return {
schemaVersion: 1,
pid: 4242,
host: '127.0.0.1',
port: 7878,
tokenAuth: false,
projectDir,
startedAt: '2026-05-14T00:00:00.000Z',
logPath: join(projectDir, '.ktx/logs/mcp.log'),
...overrides,
};
}
describe('managed MCP daemon lifecycle', () => {
let tempDir: string;
let projectDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-mcp-daemon-'));
projectDir = join(tempDir, 'project');
await mkdir(projectDir, { recursive: true });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('uses the spec state and log paths', () => {
expect(mcpDaemonLayout(projectDir)).toEqual({
statePath: join(projectDir, '.ktx/mcp.json'),
logPath: join(projectDir, '.ktx/logs/mcp.log'),
});
});
it('starts a detached child and writes state without the token value', async () => {
const spawnDaemon = vi.fn(() => child(5555));
await startKtxMcpDaemon({
projectDir,
cliVersion: '0.0.0-test',
host: '0.0.0.0',
port: 7879,
token: 'secret-token',
allowedHosts: ['mcp.example.test'],
allowedOrigins: ['https://mcp.example.test'],
binPath: '/repo/packages/cli/dist/bin.js',
spawnDaemon,
processAlive: vi.fn(() => false),
portAvailable: vi.fn(async () => true),
now: () => new Date('2026-05-14T00:00:00.000Z'),
});
expect(spawnDaemon).toHaveBeenCalledWith(
process.execPath,
[
'/repo/packages/cli/dist/bin.js',
'--project-dir',
projectDir,
'mcp',
'serve-internal',
'--host',
'0.0.0.0',
'--port',
'7879',
'--allowed-host',
'mcp.example.test',
'--allowed-origin',
'https://mcp.example.test',
],
expect.objectContaining({
detached: true,
env: expect.objectContaining({ KTX_MCP_TOKEN: 'secret-token' }),
}),
);
expect(JSON.stringify(JSON.parse(await readFile(join(projectDir, '.ktx/mcp.json'), 'utf8')))).not.toContain(
'secret-token',
);
});
it('reports running when the process is alive and health passes', async () => {
await mkdir(join(projectDir, '.ktx'), { recursive: true });
await writeFile(join(projectDir, '.ktx/mcp.json'), `${JSON.stringify(state(projectDir), null, 2)}\n`);
const status = await readKtxMcpDaemonStatus({
projectDir,
processAlive: vi.fn(() => true),
fetchHealth: vi.fn(async () => ({ ok: true, body: { status: 'ok', projectDir, port: 7878 } })),
});
expect(status.kind).toBe('running');
if (status.kind !== 'running') {
throw new Error(`Expected running status, received ${status.kind}`);
}
expect(status.url).toBe('http://127.0.0.1:7878/mcp');
});
it('stops a recorded daemon and removes state', async () => {
await mkdir(join(projectDir, '.ktx'), { recursive: true });
await writeFile(join(projectDir, '.ktx/mcp.json'), `${JSON.stringify(state(projectDir), null, 2)}\n`);
const alive = new Set([4242]);
const killProcess = vi.fn((pid: number) => alive.delete(pid));
await expect(
stopKtxMcpDaemon({
projectDir,
processAlive: vi.fn((pid) => alive.has(pid)),
killProcess,
stopGraceMs: 1,
pollIntervalMs: 1,
}),
).resolves.toEqual({ status: 'stopped' });
expect(killProcess).toHaveBeenCalledWith(4242, 'SIGTERM');
await expect(readFile(join(projectDir, '.ktx/mcp.json'), 'utf8')).rejects.toThrow();
});
});

View file

@ -0,0 +1,238 @@
import { spawn } from 'node:child_process';
import { mkdir, open, readFile, rm, writeFile } from 'node:fs/promises';
import { createServer } from 'node:net';
import { dirname, join } from 'node:path';
import { setTimeout as delay } from 'node:timers/promises';
import { z } from 'zod';
export interface KtxMcpDaemonState {
schemaVersion: 1;
pid: number;
host: string;
port: number;
tokenAuth: boolean;
projectDir: string;
startedAt: string;
logPath: string;
}
export interface KtxMcpDaemonChild {
pid?: number;
unref(): void;
}
export type KtxMcpDaemonStatus =
| { kind: 'stopped'; detail: string }
| { kind: 'running'; detail: string; state: KtxMcpDaemonState; url: string }
| { kind: 'stale'; detail: string; state?: KtxMcpDaemonState };
const stateSchema = z.object({
schemaVersion: z.literal(1),
pid: z.number().int().positive(),
host: z.string().min(1),
port: z.number().int().min(1).max(65535),
tokenAuth: z.boolean(),
projectDir: z.string().min(1),
startedAt: z.string().min(1),
logPath: z.string().min(1),
});
export function mcpDaemonLayout(projectDir: string): { statePath: string; logPath: string } {
return {
statePath: join(projectDir, '.ktx/mcp.json'),
logPath: join(projectDir, '.ktx/logs/mcp.log'),
};
}
function defaultProcessAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
function defaultKillProcess(pid: number, signal: NodeJS.Signals): void {
try {
process.kill(pid, signal);
} catch (error) {
if ((error as { code?: unknown }).code !== 'ESRCH') {
throw error;
}
}
}
async function readState(projectDir: string): Promise<KtxMcpDaemonState | undefined> {
try {
return stateSchema.parse(JSON.parse(await readFile(mcpDaemonLayout(projectDir).statePath, 'utf8')) as unknown);
} catch (error) {
if ((error as { code?: unknown }).code === 'ENOENT') {
return undefined;
}
throw error;
}
}
async function writeState(projectDir: string, state: KtxMcpDaemonState): Promise<void> {
const { statePath } = mcpDaemonLayout(projectDir);
await mkdir(dirname(statePath), { recursive: true });
await writeFile(statePath, `${JSON.stringify(state, null, 2)}\n`, 'utf8');
}
async function defaultPortAvailable(host: string, port: number): Promise<boolean> {
return await new Promise((resolve) => {
const server = createServer();
server.once('error', () => resolve(false));
server.listen(port, host, () => server.close(() => resolve(true)));
});
}
function defaultSpawnDaemon(
command: string,
args: string[],
options: { detached: boolean; stdio: ['ignore', number, number]; env: NodeJS.ProcessEnv },
): KtxMcpDaemonChild {
return spawn(command, args, options);
}
async function defaultFetchHealth(state: KtxMcpDaemonState): Promise<{ ok: boolean; body: unknown; detail?: string }> {
try {
const response = await fetch(`http://${state.host}:${state.port}/health`, {
headers: { host: `${state.host}:${state.port}` },
});
const body = await response.json();
return { ok: response.ok, body, detail: response.ok ? undefined : `HTTP ${response.status}` };
} catch (error) {
return { ok: false, body: null, detail: error instanceof Error ? error.message : String(error) };
}
}
export async function startKtxMcpDaemon(options: {
projectDir: string;
cliVersion: string;
host: string;
port: number;
token?: string;
allowedHosts: string[];
allowedOrigins: string[];
binPath: string;
processAlive?: (pid: number) => boolean;
portAvailable?: (host: string, port: number) => Promise<boolean>;
spawnDaemon?: typeof defaultSpawnDaemon;
now?: () => Date;
}): Promise<{ status: 'started'; state: KtxMcpDaemonState; url: string }> {
const existing = await readState(options.projectDir).catch(() => undefined);
const processAlive = options.processAlive ?? defaultProcessAlive;
if (existing && processAlive(existing.pid)) {
throw new Error(`KTX MCP daemon is already recorded at http://${existing.host}:${existing.port}/mcp`);
}
const portAvailable = options.portAvailable ?? defaultPortAvailable;
if (!(await portAvailable(options.host, options.port))) {
throw new Error(`Port ${options.port} is already in use. Choose another port with --port <n>.`);
}
const { logPath } = mcpDaemonLayout(options.projectDir);
await mkdir(dirname(logPath), { recursive: true });
const log = await open(logPath, 'a');
try {
const args = [
options.binPath,
'--project-dir',
options.projectDir,
'mcp',
'serve-internal',
'--host',
options.host,
'--port',
String(options.port),
...options.allowedHosts.flatMap((host) => ['--allowed-host', host]),
...options.allowedOrigins.flatMap((origin) => ['--allowed-origin', origin]),
];
const child = (options.spawnDaemon ?? defaultSpawnDaemon)(process.execPath, args, {
detached: true,
stdio: ['ignore', log.fd, log.fd],
env: {
...process.env,
KTX_CLI_VERSION: options.cliVersion,
...(options.token ? { KTX_MCP_TOKEN: options.token } : {}),
},
});
if (!child.pid) {
throw new Error('Failed to start KTX MCP daemon: child process pid was not available.');
}
child.unref();
const state: KtxMcpDaemonState = {
schemaVersion: 1,
pid: child.pid,
host: options.host,
port: options.port,
tokenAuth: Boolean(options.token),
projectDir: options.projectDir,
startedAt: (options.now ?? (() => new Date()))().toISOString(),
logPath,
};
await writeState(options.projectDir, state);
return { status: 'started', state, url: `http://${state.host}:${state.port}/mcp` };
} finally {
await log.close();
}
}
export async function readKtxMcpDaemonStatus(options: {
projectDir: string;
processAlive?: (pid: number) => boolean;
fetchHealth?: (state: KtxMcpDaemonState) => Promise<{ ok: boolean; body: unknown; detail?: string }>;
}): Promise<KtxMcpDaemonStatus> {
let state: KtxMcpDaemonState | undefined;
try {
state = await readState(options.projectDir);
} catch (error) {
return { kind: 'stale', detail: `MCP daemon state is invalid: ${error instanceof Error ? error.message : String(error)}` };
}
if (!state) {
return { kind: 'stopped', detail: `No MCP daemon state at ${mcpDaemonLayout(options.projectDir).statePath}` };
}
const processAlive = options.processAlive ?? defaultProcessAlive;
if (!processAlive(state.pid)) {
return { kind: 'stale', detail: `MCP daemon process ${state.pid} is not running`, state };
}
const health = await (options.fetchHealth ?? defaultFetchHealth)(state);
if (!health.ok) {
return { kind: 'stale', detail: health.detail ?? 'MCP daemon health check failed', state };
}
return {
kind: 'running',
detail: `KTX MCP daemon running at http://${state.host}:${state.port}/mcp`,
state,
url: `http://${state.host}:${state.port}/mcp`,
};
}
export async function stopKtxMcpDaemon(options: {
projectDir: string;
processAlive?: (pid: number) => boolean;
killProcess?: (pid: number, signal: NodeJS.Signals) => void;
stopGraceMs?: number;
pollIntervalMs?: number;
}): Promise<{ status: 'stopped' | 'already-stopped' }> {
const state = await readState(options.projectDir);
const { statePath } = mcpDaemonLayout(options.projectDir);
if (!state) {
return { status: 'already-stopped' };
}
const processAlive = options.processAlive ?? defaultProcessAlive;
const killProcess = options.killProcess ?? defaultKillProcess;
if (processAlive(state.pid)) {
killProcess(state.pid, 'SIGTERM');
const deadline = Date.now() + (options.stopGraceMs ?? 10_000);
while (Date.now() <= deadline && processAlive(state.pid)) {
await delay(options.pollIntervalMs ?? 100);
}
if (processAlive(state.pid)) {
killProcess(state.pid, 'SIGKILL');
}
}
await rm(statePath, { force: true });
return { status: 'stopped' };
}

View file

@ -0,0 +1,274 @@
import { request } from 'node:http';
import type { AddressInfo } from 'node:net';
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { describe, expect, it } from 'vitest';
import {
buildMcpSecurityConfig,
isMcpRequestAuthorized,
normalizeHostHeader,
runKtxMcpHttpServer,
} from './mcp-http-server.js';
describe('normalizeHostHeader', () => {
it('normalizes host headers before allow-list comparison', () => {
expect(normalizeHostHeader('LOCALHOST:7878')).toBe('localhost');
expect(normalizeHostHeader('127.0.0.1:7878')).toBe('127.0.0.1');
expect(normalizeHostHeader('[::1]:7878')).toBe('::1');
expect(normalizeHostHeader(' Example.COM ')).toBe('example.com');
});
});
describe('buildMcpSecurityConfig', () => {
it('allows loopback hosts without a token', () => {
const config = buildMcpSecurityConfig({
host: '127.0.0.1',
port: 7878,
token: undefined,
allowedHosts: [],
allowedOrigins: [],
});
expect(config.token).toBeUndefined();
expect(config.allowedHosts).toEqual(['localhost', '127.0.0.1', '::1']);
});
it('requires a token for non-loopback binding', () => {
expect(() =>
buildMcpSecurityConfig({
host: '0.0.0.0',
port: 7878,
token: undefined,
allowedHosts: [],
allowedOrigins: [],
}),
).toThrow('Binding KTX MCP to 0.0.0.0 requires --token or KTX_MCP_TOKEN');
});
it('validates allowed origins as full origins', () => {
expect(() =>
buildMcpSecurityConfig({
host: '127.0.0.1',
port: 7878,
token: undefined,
allowedHosts: [],
allowedOrigins: ['localhost:7878'],
}),
).toThrow('Allowed origin must be a full origin URL');
});
});
describe('isMcpRequestAuthorized', () => {
const config = buildMcpSecurityConfig({
host: '0.0.0.0',
port: 7878,
token: 'secret-token',
allowedHosts: ['mcp.example.test'],
allowedOrigins: ['https://mcp.example.test'],
});
it('accepts a valid host, origin, and bearer token', () => {
expect(
isMcpRequestAuthorized(
{
path: '/mcp',
headers: {
host: 'mcp.example.test:7878',
origin: 'https://mcp.example.test',
authorization: 'Bearer secret-token',
},
},
config,
),
).toEqual({ ok: true });
});
it('rejects bad host headers before MCP handling', () => {
expect(
isMcpRequestAuthorized(
{ path: '/health', headers: { host: 'evil.example.test' } },
config,
),
).toEqual({ ok: false, status: 403, message: 'Host header is not allowed for KTX MCP.' });
});
it('rejects browser origins unless explicitly allowed', () => {
expect(
isMcpRequestAuthorized(
{
path: '/health',
headers: { host: 'mcp.example.test', origin: 'https://evil.example.test' },
},
config,
),
).toEqual({ ok: false, status: 403, message: 'Origin header is not allowed for KTX MCP.' });
});
it('requires bearer auth on /mcp when token auth is enabled', () => {
expect(
isMcpRequestAuthorized(
{ path: '/mcp', headers: { host: 'mcp.example.test', authorization: 'Bearer wrong' } },
config,
),
).toEqual({ ok: false, status: 401, message: 'Missing or invalid KTX MCP bearer token.' });
});
it('does not require bearer auth on /health', () => {
expect(isMcpRequestAuthorized({ path: '/health', headers: { host: 'mcp.example.test' } }, config)).toEqual({
ok: true,
});
});
});
function postJson(port: number, path: string, body: unknown, headers: Record<string, string> = {}) {
return new Promise<{ status: number; headers: Record<string, string | string[] | undefined>; body: string }>(
(resolve, reject) => {
const payload = JSON.stringify(body);
const req = request(
{
host: '127.0.0.1',
port,
path,
method: 'POST',
headers: {
host: `127.0.0.1:${port}`,
accept: 'application/json, text/event-stream',
'content-type': 'application/json',
'content-length': Buffer.byteLength(payload),
...headers,
},
},
(res) => {
const chunks: Buffer[] = [];
res.on('data', (chunk: Buffer) => chunks.push(chunk));
res.on('end', () =>
resolve({
status: res.statusCode ?? 0,
headers: res.headers,
body: Buffer.concat(chunks).toString('utf8'),
}),
);
},
);
req.on('error', reject);
req.end(payload);
},
);
}
function get(port: number, path: string, headers: Record<string, string> = {}) {
return new Promise<{ status: number; headers: Record<string, string | string[] | undefined>; body: string }>(
(resolve, reject) => {
const req = request(
{
host: '127.0.0.1',
port,
path,
method: 'GET',
headers: { host: `127.0.0.1:${port}`, ...headers },
},
(res) => {
const chunks: Buffer[] = [];
res.on('data', (chunk: Buffer) => chunks.push(chunk));
res.on('end', () =>
resolve({
status: res.statusCode ?? 0,
headers: res.headers,
body: Buffer.concat(chunks).toString('utf8'),
}),
);
},
);
req.on('error', reject);
req.end();
},
);
}
function createTestMcpServer() {
return () => {
const server = new McpServer({ name: 'ktx-test', version: '0.0.0-test' });
server.registerTool('ping', { inputSchema: {} }, async () => ({
content: [{ type: 'text', text: 'pong' }],
}));
return server;
};
}
describe('runKtxMcpHttpServer', () => {
it('serves /health with project metadata', async () => {
const handle = await runKtxMcpHttpServer({
projectDir: '/tmp/ktx-project',
host: '127.0.0.1',
port: 0,
allowedHosts: [],
allowedOrigins: [],
createMcpServer: createTestMcpServer(),
});
try {
const port = (handle.server.address() as AddressInfo).port;
const response = await get(port, '/health');
expect(response.status).toBe(200);
expect(JSON.parse(response.body)).toEqual({
status: 'ok',
projectDir: '/tmp/ktx-project',
port,
});
} finally {
await handle.close();
}
});
it('allocates a stateful MCP session on initialize', async () => {
const handle = await runKtxMcpHttpServer({
projectDir: '/tmp/ktx-project',
host: '127.0.0.1',
port: 0,
allowedHosts: [],
allowedOrigins: [],
createMcpServer: createTestMcpServer(),
});
try {
const port = (handle.server.address() as AddressInfo).port;
const response = await postJson(port, '/mcp', {
jsonrpc: '2.0',
id: 1,
method: 'initialize',
params: {
protocolVersion: '2025-06-18',
capabilities: {},
clientInfo: { name: 'vitest', version: '0.0.0' },
},
});
expect(response.status).toBe(200);
expect(response.headers['mcp-session-id']).toBeTruthy();
} finally {
await handle.close();
}
});
it('rejects unknown session ids with 404', async () => {
const handle = await runKtxMcpHttpServer({
projectDir: '/tmp/ktx-project',
host: '127.0.0.1',
port: 0,
allowedHosts: [],
allowedOrigins: [],
createMcpServer: createTestMcpServer(),
});
try {
const port = (handle.server.address() as AddressInfo).port;
const response = await postJson(
port,
'/mcp',
{ jsonrpc: '2.0', id: 2, method: 'tools/list', params: {} },
{ 'mcp-session-id': 'missing-session' },
);
expect(response.status).toBe(404);
expect(response.body).toContain('Unknown MCP session');
} finally {
await handle.close();
}
});
});

View file

@ -0,0 +1,340 @@
import { randomUUID } from 'node:crypto';
import { createServer, type IncomingHttpHeaders, type IncomingMessage, type Server, type ServerResponse } from 'node:http';
import { createDefaultKtxMcpServer, createLocalProjectMcpContextPorts } from '@ktx/context/mcp';
import { createLocalProjectMemoryCapture } from '@ktx/context/memory';
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import { isInitializeRequest } from '@modelcontextprotocol/sdk/types.js';
import type { KtxCliIo } from './cli-runtime.js';
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
import { createKtxCliScanConnector } from './local-scan-connectors.js';
import { createManagedPythonSemanticLayerComputePort } from './managed-python-command.js';
import { createManagedDaemonSqlAnalysisPort } from './managed-python-http.js';
const DEFAULT_ALLOWED_HOSTS = ['localhost', '127.0.0.1', '::1'] as const;
export interface McpSecurityConfigInput {
host: string;
port: number;
token?: string;
allowedHosts: string[];
allowedOrigins: string[];
}
export interface McpSecurityConfig {
host: string;
port: number;
token?: string;
allowedHosts: string[];
allowedOrigins: string[];
}
export type McpAuthorizationResult =
| { ok: true }
| { ok: false; status: 401 | 403; message: string };
function isLoopbackHost(host: string): boolean {
const normalized = normalizeHostHeader(host);
return normalized === 'localhost' || normalized === '127.0.0.1' || normalized === '::1';
}
export function normalizeHostHeader(value: string): string {
const trimmed = value.trim().toLowerCase();
if (trimmed.startsWith('[')) {
const close = trimmed.indexOf(']');
return close >= 0 ? trimmed.slice(1, close) : trimmed.replace(/^\[/, '');
}
const colon = trimmed.lastIndexOf(':');
if (colon > -1 && trimmed.indexOf(':') === colon) {
return trimmed.slice(0, colon);
}
return trimmed;
}
function fullOrigin(value: string): string {
let parsed: URL;
try {
parsed = new URL(value);
} catch {
throw new Error(`Allowed origin must be a full origin URL: ${value}`);
}
if (!parsed.protocol || !parsed.host || parsed.pathname !== '/' || parsed.search || parsed.hash) {
throw new Error(`Allowed origin must be a full origin URL: ${value}`);
}
return parsed.origin;
}
export function buildMcpSecurityConfig(input: McpSecurityConfigInput): McpSecurityConfig {
if (!isLoopbackHost(input.host) && !input.token) {
throw new Error(`Binding KTX MCP to ${input.host} requires --token or KTX_MCP_TOKEN`);
}
const allowedHostSet = new Set<string>(DEFAULT_ALLOWED_HOSTS);
if (!isLoopbackHost(input.host)) {
allowedHostSet.add(normalizeHostHeader(input.host));
}
for (const host of input.allowedHosts) {
allowedHostSet.add(normalizeHostHeader(host));
}
return {
host: input.host,
port: input.port,
...(input.token ? { token: input.token } : {}),
allowedHosts: [...allowedHostSet],
allowedOrigins: input.allowedOrigins.map(fullOrigin),
};
}
function headerValue(headers: IncomingHttpHeaders | Record<string, string | undefined>, name: string): string | undefined {
const value = headers[name.toLowerCase()];
return Array.isArray(value) ? value[0] : value;
}
export function isMcpRequestAuthorized(
request: { path: string; headers: IncomingHttpHeaders | Record<string, string | undefined> },
config: McpSecurityConfig,
): McpAuthorizationResult {
const host = headerValue(request.headers, 'host');
if (!host || !config.allowedHosts.includes(normalizeHostHeader(host))) {
return { ok: false, status: 403, message: 'Host header is not allowed for KTX MCP.' };
}
const origin = headerValue(request.headers, 'origin');
if (origin && !config.allowedOrigins.includes(origin)) {
return { ok: false, status: 403, message: 'Origin header is not allowed for KTX MCP.' };
}
if (request.path === '/mcp' && config.token) {
const auth = headerValue(request.headers, 'authorization');
if (auth !== `Bearer ${config.token}`) {
return { ok: false, status: 401, message: 'Missing or invalid KTX MCP bearer token.' };
}
}
return { ok: true };
}
export interface KtxMcpHttpServerHandle {
server: Server;
close(): Promise<void>;
}
export interface RunKtxMcpHttpServerOptions extends McpSecurityConfigInput {
projectDir: string;
cliVersion?: string;
io?: KtxCliIo;
createMcpServer?: () => McpServer;
loadProject?: typeof loadKtxProject;
}
function noopIo(): KtxCliIo {
return {
stdout: { write() {} },
stderr: { write() {} },
};
}
function writeJson(res: ServerResponse, status: number, body: object): void {
const payload = `${JSON.stringify(body)}\n`;
res.writeHead(status, {
'content-type': 'application/json',
'content-length': Buffer.byteLength(payload),
});
res.end(payload);
}
function writeText(res: ServerResponse, status: number, body: string): void {
res.writeHead(status, { 'content-type': 'text/plain; charset=utf-8' });
res.end(body);
}
function requestPath(req: IncomingMessage): string {
const url = new URL(req.url ?? '/', 'http://127.0.0.1');
return url.pathname;
}
async function readJsonBody(req: IncomingMessage): Promise<unknown> {
const chunks: Buffer[] = [];
for await (const chunk of req) {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
}
const raw = Buffer.concat(chunks).toString('utf8');
return raw.trim().length === 0 ? undefined : (JSON.parse(raw) as unknown);
}
async function defaultMcpServerFactory(input: {
project: KtxLocalProject;
projectDir: string;
cliVersion: string;
io?: KtxCliIo;
}): Promise<() => McpServer> {
const io = input.io ?? noopIo();
const queryExecutor = createKtxCliIngestQueryExecutor(input.project);
const semanticLayerCompute = await createManagedPythonSemanticLayerComputePort({
cliVersion: input.cliVersion,
installPolicy: 'auto',
io,
});
const sqlAnalysis = createManagedDaemonSqlAnalysisPort({
cliVersion: input.cliVersion,
projectDir: input.projectDir,
installPolicy: 'auto',
io,
});
const contextTools = createLocalProjectMcpContextPorts(input.project, {
semanticLayerCompute,
queryExecutor,
sqlAnalysis,
localScan: {
createConnector: async (connectionId) => createKtxCliScanConnector(input.project, connectionId),
},
localIngest: {
semanticLayerCompute,
queryExecutor,
},
});
let memoryCapture: ReturnType<typeof createLocalProjectMemoryCapture> | undefined;
try {
memoryCapture = createLocalProjectMemoryCapture(input.project, { semanticLayerCompute, queryExecutor });
} catch (error) {
input.io?.stderr.write(`KTX MCP memory_capture disabled: ${error instanceof Error ? error.message : String(error)}\n`);
}
return () =>
createDefaultKtxMcpServer({
name: 'ktx',
version: input.cliVersion,
userContext: { userId: 'local' },
contextTools,
memoryCapture,
});
}
function listenerPort(server: Server, fallback: number): number {
const address = server.address();
return typeof address === 'object' && address ? address.port : fallback;
}
function transportAllowedHosts(config: McpSecurityConfig, server: Server): string[] {
const port = listenerPort(server, config.port);
const hosts = new Set<string>(config.allowedHosts);
for (const host of config.allowedHosts) {
hosts.add(`${host}:${port}`);
if (config.port !== 0 && config.port !== port) {
hosts.add(`${host}:${config.port}`);
}
}
return [...hosts];
}
export async function runKtxMcpHttpServer(options: RunKtxMcpHttpServerOptions): Promise<KtxMcpHttpServerHandle> {
const config = buildMcpSecurityConfig(options);
const project =
options.createMcpServer === undefined
? await (options.loadProject ?? loadKtxProject)({ projectDir: options.projectDir })
: undefined;
const createMcpServer =
options.createMcpServer ??
(await defaultMcpServerFactory({
project: project!,
projectDir: options.projectDir,
cliVersion: options.cliVersion ?? '0.0.0-private',
io: options.io,
}));
const sessions = new Map<string, StreamableHTTPServerTransport>();
async function newTransport(): Promise<StreamableHTTPServerTransport> {
let transport: StreamableHTTPServerTransport;
transport = new StreamableHTTPServerTransport({
sessionIdGenerator: () => randomUUID(),
onsessioninitialized: (sessionId) => {
sessions.set(sessionId, transport);
},
onsessionclosed: (sessionId) => {
sessions.delete(sessionId);
},
allowedHosts: transportAllowedHosts(config, server),
allowedOrigins: config.allowedOrigins,
enableDnsRebindingProtection: true,
});
transport.onclose = () => {
if (transport.sessionId) {
sessions.delete(transport.sessionId);
}
};
await createMcpServer().connect(transport);
return transport;
}
const server = createServer(async (req, res) => {
const path = requestPath(req);
const auth = isMcpRequestAuthorized({ path, headers: req.headers }, config);
if (!auth.ok) {
writeText(res, auth.status, auth.message);
return;
}
if (path === '/health' && req.method === 'GET') {
const port = listenerPort(server, config.port);
writeJson(res, 200, { status: 'ok', projectDir: options.projectDir, port });
return;
}
if (path !== '/mcp' || !['POST', 'GET', 'DELETE'].includes(req.method ?? '')) {
writeText(res, 404, 'Not found');
return;
}
const sessionId = req.headers['mcp-session-id'];
const normalizedSessionId = Array.isArray(sessionId) ? sessionId[0] : sessionId;
if (req.method === 'POST') {
let body: unknown;
try {
body = await readJsonBody(req);
} catch (error) {
writeText(res, 400, `Invalid JSON body: ${error instanceof Error ? error.message : String(error)}`);
return;
}
const existing = normalizedSessionId ? sessions.get(normalizedSessionId) : undefined;
if (existing) {
await existing.handleRequest(req, res, body);
return;
}
if (normalizedSessionId) {
writeText(res, 404, `Unknown MCP session: ${normalizedSessionId}`);
return;
}
if (!isInitializeRequest(body)) {
writeText(res, 400, 'MCP initialize request is required before session traffic.');
return;
}
await (await newTransport()).handleRequest(req, res, body);
return;
}
if (!normalizedSessionId || !sessions.has(normalizedSessionId)) {
writeText(res, 404, normalizedSessionId ? `Unknown MCP session: ${normalizedSessionId}` : 'Missing MCP session id.');
return;
}
await sessions.get(normalizedSessionId)!.handleRequest(req, res);
});
await new Promise<void>((resolve, reject) => {
server.once('error', reject);
server.listen(config.port, config.host, () => {
server.off('error', reject);
resolve();
});
});
return {
server,
async close() {
for (const transport of sessions.values()) {
await transport.close();
}
await new Promise<void>((resolve, reject) => {
server.close((error) => (error ? reject(error) : resolve()));
});
},
};
}

View file

@ -12,11 +12,12 @@ describe('renderKtxCommandTree', () => {
.filter((line) => /^ {2}[├└]── \S/.test(line))
.map((line) => line.replace(/^ {2}[├└]── /, '').trim().split(' ')[0]);
for (const expected of ['setup', 'connection', 'ingest', 'sl', 'dev']) {
for (const expected of ['setup', 'connection', 'ingest', 'sl', 'mcp', 'dev']) {
expect(topLevel).toContain(expected);
}
expect(output).toContain('│ └── test [connectionId]');
expect(output).toContain('│ ├── status Show KTX MCP daemon status');
expect(output).not.toContain('│ ├── add');
expect(output).not.toContain('│ ├── remove');
expect(output).not.toContain('│ ├── map');
@ -24,7 +25,6 @@ describe('renderKtxCommandTree', () => {
expect(output).not.toContain('│ ├── metabase');
expect(output).not.toContain('│ ├── notion');
expect(output).not.toContain('scan <connectionId>');
expect(output).not.toContain('│ ├── status');
expect(output).not.toContain('│ ├── replay');
expect(output).not.toContain('│ └── replay');
expect(output).not.toContain('│ ├── run');

View file

@ -37,23 +37,28 @@ describe('setup agents', () => {
await rm(tempDir, { recursive: true, force: true });
});
it('plans project-scoped CLI files for every target', () => {
it('plans project-scoped CLI and research files for every target', () => {
expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'claude-code', scope: 'project', mode: 'cli' })).toEqual([
{ kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
{ kind: 'file', path: join(tempDir, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' },
{ kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' },
]);
expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'codex', scope: 'project', mode: 'cli' })).toEqual([
{ kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md'), role: 'skill' },
{ kind: 'file', path: join(tempDir, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' },
{ kind: 'file', path: join(tempDir, '.codex/instructions/ktx.md'), role: 'rule' },
]);
expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'cursor', scope: 'project', mode: 'cli' })).toEqual([
{ kind: 'file', path: join(tempDir, '.cursor/rules/ktx.mdc') },
{ kind: 'file', path: join(tempDir, '.cursor/rules/ktx-research.mdc'), role: 'research-skill' },
]);
expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'opencode', scope: 'project', mode: 'cli' })).toEqual([
{ kind: 'file', path: join(tempDir, '.opencode/commands/ktx.md') },
{ kind: 'file', path: join(tempDir, '.opencode/commands/ktx-research.md'), role: 'research-skill' },
]);
expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'universal', scope: 'project', mode: 'cli' })).toEqual([
{ kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md') },
{ kind: 'file', path: join(tempDir, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' },
]);
});
@ -97,6 +102,31 @@ describe('setup agents', () => {
expect(io.stderr()).toBe('');
});
it('installs the research skill from the runtime asset', async () => {
const io = makeIo();
await expect(
runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'universal',
scope: 'project',
mode: 'cli',
skipAgents: false,
},
io.io,
),
).resolves.toMatchObject({ status: 'ready' });
const researchSkill = await readFile(join(tempDir, '.agents/skills/ktx-research/SKILL.md'), 'utf-8');
expect(researchSkill).toContain('name: ktx-research');
expect(researchSkill).toContain('Always run `discover_data` before writing SQL.');
expect(researchSkill).toContain('Treat a `dictionary_search` miss as non-authoritative.');
});
it('writes PATH-independent launcher commands for skills', async () => {
const io = makeIo();
@ -123,6 +153,178 @@ describe('setup agents', () => {
expect(skill).not.toContain('sql execute');
});
it('writes Claude Code project MCP config and tracks the json key', async () => {
const io = makeIo();
await expect(
runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'claude-code',
scope: 'project',
mode: 'cli',
skipAgents: false,
},
io.io,
),
).resolves.toMatchObject({ status: 'ready' });
const mcpJson = JSON.parse(await readFile(join(tempDir, '.mcp.json'), 'utf-8')) as {
mcpServers: { ktx: { type: string; url: string; headers?: Record<string, string> } };
};
expect(mcpJson.mcpServers.ktx).toEqual({ type: 'http', url: 'http://localhost:7878/mcp' });
expect(await readKtxAgentInstallManifest(tempDir)).toMatchObject({
entries: expect.arrayContaining([{ kind: 'json-key', path: join(tempDir, '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] }]),
});
expect(io.stdout()).toContain('Run `ktx mcp start` to enable the configured KTX MCP server.');
});
it('writes Cursor project MCP config', async () => {
const io = makeIo();
await runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'cursor',
scope: 'project',
mode: 'cli',
skipAgents: false,
},
io.io,
);
const cursorJson = JSON.parse(await readFile(join(tempDir, '.cursor/mcp.json'), 'utf-8')) as {
mcpServers: { ktx: { url: string; headers?: Record<string, string> } };
};
expect(cursorJson.mcpServers.ktx).toEqual({ url: 'http://localhost:7878/mcp' });
});
it('prints Codex and opencode snippets without mutating printed-only config files', async () => {
const codexIo = makeIo();
await runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'codex',
scope: 'project',
mode: 'cli',
skipAgents: false,
},
codexIo.io,
);
expect(codexIo.stdout()).toContain('[mcp_servers.ktx]');
expect(codexIo.stdout()).toContain('url = "http://localhost:7878/mcp"');
const opencodeIo = makeIo();
await runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'opencode',
scope: 'project',
mode: 'cli',
skipAgents: false,
},
opencodeIo.io,
);
expect(opencodeIo.stdout()).toContain('"mcp"');
expect(opencodeIo.stdout()).toContain('"type": "remote"');
await expect(readFile(join(tempDir, 'opencode.json'), 'utf-8')).rejects.toThrow();
});
it('uses MCP daemon state for port and token metadata without rendering literal tokens', async () => {
await mkdir(join(tempDir, '.ktx'), { recursive: true });
await writeFile(
join(tempDir, '.ktx/mcp.json'),
`${JSON.stringify(
{
schemaVersion: 1,
pid: 999999,
host: '127.0.0.1',
port: 8787,
tokenAuth: true,
projectDir: tempDir,
startedAt: '2026-05-14T00:00:00.000Z',
logPath: join(tempDir, '.ktx/logs/mcp.log'),
},
null,
2,
)}\n`,
'utf-8',
);
const io = makeIo();
const previousToken = process.env.KTX_MCP_TOKEN;
process.env.KTX_MCP_TOKEN = 'secret-token';
try {
await runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'claude-code',
scope: 'project',
mode: 'cli',
skipAgents: false,
},
io.io,
);
const rendered = JSON.stringify(JSON.parse(await readFile(join(tempDir, '.mcp.json'), 'utf-8')));
expect(rendered).toContain('http://127.0.0.1:8787/mcp');
expect(rendered).toContain('Bearer ${KTX_MCP_TOKEN}');
expect(rendered).not.toContain('secret-token');
expect(io.stdout()).toContain('Run `ktx mcp start` to enable the configured KTX MCP server.');
} finally {
if (previousToken === undefined) {
delete process.env.KTX_MCP_TOKEN;
} else {
process.env.KTX_MCP_TOKEN = previousToken;
}
}
});
it('writes Claude Code local MCP config under the project key in ~/.claude.json', async () => {
const home = await mkdtemp(join(tmpdir(), 'ktx-setup-agents-home-'));
const previousHome = process.env.HOME;
process.env.HOME = home;
try {
const io = makeIo();
await runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'claude-code',
scope: 'local',
mode: 'cli',
skipAgents: false,
},
io.io,
);
const config = JSON.parse(await readFile(join(home, '.claude.json'), 'utf-8')) as {
projects: Record<string, { mcpServers: { ktx: { type: string; url: string } } }>;
};
expect(config.projects[tempDir].mcpServers.ktx).toEqual({ type: 'http', url: 'http://localhost:7878/mcp' });
} finally {
process.env.HOME = previousHome;
await rm(home, { recursive: true, force: true });
}
});
it('removes only manifest-listed files', async () => {
const io = makeIo();
await runKtxSetupAgentsStep(

View file

@ -1,3 +1,4 @@
import { existsSync } from 'node:fs';
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
import { dirname, join, relative, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
@ -12,9 +13,10 @@ import {
createKtxSetupPromptAdapter,
type KtxSetupPromptOption,
} from './setup-prompts.js';
import { readKtxMcpDaemonStatus } from './managed-mcp-daemon.js';
export type KtxAgentTarget = 'claude-code' | 'codex' | 'cursor' | 'opencode' | 'universal';
export type KtxAgentScope = 'project' | 'global';
export type KtxAgentScope = 'project' | 'global' | 'local';
export type KtxAgentInstallMode = 'cli';
export interface KtxSetupAgentsArgs {
@ -45,18 +47,179 @@ export interface KtxAgentInstallManifest {
installedAt: string;
installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>;
entries: Array<
| { kind: 'file'; path: string; role?: 'skill' | 'rule' }
| { kind: 'file'; path: string; role?: 'skill' | 'rule' | 'research-skill' }
| { kind: 'json-key'; path: string; jsonPath: string[] }
>;
}
type InstallEntry = KtxAgentInstallManifest['entries'][number];
interface KtxMcpEndpointInfo {
url: string;
tokenAuth: boolean;
running: boolean;
}
interface KtxMcpClientInstallResult {
entries: InstallEntry[];
snippets: string[];
notices: string[];
}
interface KtxCliLauncher {
command: string;
args: string[];
}
async function readJsonObject(path: string): Promise<Record<string, unknown>> {
if (!existsSync(path)) return {};
const parsed = JSON.parse(await readFile(path, 'utf-8')) as unknown;
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
throw new Error(`Expected JSON object in ${path}`);
}
return parsed as Record<string, unknown>;
}
function objectAtPath(root: Record<string, unknown>, jsonPath: string[]): Record<string, unknown> {
let cursor = root;
for (const segment of jsonPath) {
const current = cursor[segment];
if (!current || typeof current !== 'object' || Array.isArray(current)) {
cursor[segment] = {};
}
cursor = cursor[segment] as Record<string, unknown>;
}
return cursor;
}
async function writeJsonKey(path: string, jsonPath: string[], value: unknown): Promise<void> {
const root = await readJsonObject(path);
const parent = objectAtPath(root, jsonPath.slice(0, -1));
parent[jsonPath.at(-1) as string] = value;
await mkdir(dirname(path), { recursive: true });
await writeFile(path, `${JSON.stringify(root, null, 2)}\n`, 'utf-8');
}
async function resolveMcpEndpoint(projectDir: string): Promise<KtxMcpEndpointInfo> {
const status = await readKtxMcpDaemonStatus({ projectDir }).catch(() => null);
if (status?.kind === 'running') {
return {
url: status.url,
tokenAuth: status.state.tokenAuth,
running: true,
};
}
if (status?.kind === 'stale' && status.state) {
return {
url: `http://${status.state.host}:${status.state.port}/mcp`,
tokenAuth: status.state.tokenAuth || Boolean(process.env.KTX_MCP_TOKEN),
running: false,
};
}
return {
url: 'http://localhost:7878/mcp',
tokenAuth: Boolean(process.env.KTX_MCP_TOKEN),
running: false,
};
}
function tokenHeaders(endpoint: KtxMcpEndpointInfo): Record<string, string> | undefined {
return endpoint.tokenAuth ? { Authorization: 'Bearer ${KTX_MCP_TOKEN}' } : undefined;
}
function claudeMcpEntry(endpoint: KtxMcpEndpointInfo): Record<string, unknown> {
return {
type: 'http',
url: endpoint.url,
...(tokenHeaders(endpoint) ? { headers: tokenHeaders(endpoint) } : {}),
};
}
function cursorMcpEntry(endpoint: KtxMcpEndpointInfo): Record<string, unknown> {
return {
url: endpoint.url,
...(tokenHeaders(endpoint) ? { headers: tokenHeaders(endpoint) } : {}),
};
}
function codexSnippet(endpoint: KtxMcpEndpointInfo): string {
if (endpoint.tokenAuth) {
return [
'Codex MCP config does not currently document HTTP headers.',
'Run KTX on loopback without token auth for Codex, or configure headers after Codex documents support.',
].join('\n');
}
return [`[mcp_servers.ktx]`, `url = "${endpoint.url}"`].join('\n');
}
function opencodeSnippet(endpoint: KtxMcpEndpointInfo): string {
return JSON.stringify(
{
mcp: {
ktx: {
type: 'remote',
url: endpoint.url,
enabled: true,
...(tokenHeaders(endpoint) ? { headers: tokenHeaders(endpoint) } : {}),
},
},
},
null,
2,
);
}
function claudeConfigPath(projectDir: string, scope: KtxAgentScope): { path: string; jsonPath: string[] } {
const home = process.env.HOME ?? '';
if (scope === 'global') {
return { path: join(home, '.claude.json'), jsonPath: ['mcpServers', 'ktx'] };
}
if (scope === 'local') {
return { path: join(home, '.claude.json'), jsonPath: ['projects', resolve(projectDir), 'mcpServers', 'ktx'] };
}
return { path: join(resolve(projectDir), '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] };
}
function cursorConfigPath(projectDir: string, scope: KtxAgentScope): { path: string; jsonPath: string[] } {
const home = process.env.HOME ?? '';
return {
path: scope === 'global' ? join(home, '.cursor/mcp.json') : join(resolve(projectDir), '.cursor/mcp.json'),
jsonPath: ['mcpServers', 'ktx'],
};
}
async function installMcpClientConfig(input: {
projectDir: string;
target: KtxAgentTarget;
scope: KtxAgentScope;
}): Promise<KtxMcpClientInstallResult> {
const endpoint = await resolveMcpEndpoint(input.projectDir);
const entries: InstallEntry[] = [];
const snippets: string[] = [];
const notices: string[] = [];
if (!endpoint.running) {
notices.push('Run `ktx mcp start` to enable the configured KTX MCP server.');
}
if (input.target === 'claude-code') {
const config = claudeConfigPath(input.projectDir, input.scope);
await writeJsonKey(config.path, config.jsonPath, claudeMcpEntry(endpoint));
entries.push({ kind: 'json-key', path: config.path, jsonPath: config.jsonPath });
} else if (input.target === 'cursor') {
const config = cursorConfigPath(input.projectDir, input.scope);
await writeJsonKey(config.path, config.jsonPath, cursorMcpEntry(endpoint));
entries.push({ kind: 'json-key', path: config.path, jsonPath: config.jsonPath });
} else if (input.target === 'codex') {
snippets.push(`Codex MCP snippet for ~/.codex/config.toml:\n${codexSnippet(endpoint)}`);
} else if (input.target === 'opencode') {
const path = input.scope === 'global' ? '~/.config/opencode/opencode.json' : relative(input.projectDir, join(input.projectDir, 'opencode.json'));
snippets.push(`opencode MCP snippet for ${path}:\n${opencodeSnippet(endpoint)}`);
}
return { entries, snippets, notices };
}
export function agentInstallManifestPath(projectDir: string): string {
return join(resolve(projectDir), '.ktx/agents/install-manifest.json');
}
@ -72,6 +235,7 @@ export function plannedKtxAgentFiles(input: {
const home = process.env.HOME ?? '';
return [
{ kind: 'file', path: join(home, '.claude/skills/ktx/SKILL.md'), role: 'skill' as const },
{ kind: 'file', path: join(home, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' as const },
{ kind: 'file', path: join(home, '.claude/rules/ktx.md'), role: 'rule' as const },
];
}
@ -79,25 +243,44 @@ export function plannedKtxAgentFiles(input: {
const codexHome = process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex');
return [
{ kind: 'file', path: join(codexHome, 'skills/ktx/SKILL.md'), role: 'skill' as const },
{ kind: 'file', path: join(codexHome, 'skills/ktx-research/SKILL.md'), role: 'research-skill' as const },
{ kind: 'file', path: join(codexHome, 'instructions/ktx.md'), role: 'rule' as const },
];
}
if (input.target === 'cursor' || input.target === 'opencode') {
return [];
}
throw new Error(`Global ${input.target} installation is not supported; omit --global.`);
}
const root = resolve(input.projectDir);
const cliEntries: Partial<Record<KtxAgentTarget, InstallEntry>> = {
'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' },
cursor: { kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') },
opencode: { kind: 'file', path: join(root, '.opencode/commands/ktx.md') },
universal: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') },
const cliEntries: Partial<Record<KtxAgentTarget, InstallEntry[]>> = {
'claude-code': [
{ kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
{ kind: 'file', path: join(root, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' },
],
codex: [
{ kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' },
{ kind: 'file', path: join(root, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' },
],
cursor: [
{ kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') },
{ kind: 'file', path: join(root, '.cursor/rules/ktx-research.mdc'), role: 'research-skill' },
],
opencode: [
{ kind: 'file', path: join(root, '.opencode/commands/ktx.md') },
{ kind: 'file', path: join(root, '.opencode/commands/ktx-research.md'), role: 'research-skill' },
],
universal: [
{ kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') },
{ kind: 'file', path: join(root, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' },
],
};
const ruleEntries: Partial<Record<KtxAgentTarget, InstallEntry>> = {
'claude-code': { kind: 'file', path: join(root, '.claude/rules/ktx.md'), role: 'rule' },
codex: { kind: 'file', path: join(root, '.codex/instructions/ktx.md'), role: 'rule' },
};
return [cliEntries[input.target], ruleEntries[input.target]].filter(
return [...(cliEntries[input.target] ?? []), ruleEntries[input.target]].filter(
(entry): entry is InstallEntry => entry !== undefined,
);
}
@ -109,6 +292,12 @@ function ktxCliLauncher(): KtxCliLauncher {
};
}
async function readResearchSkillContent(): Promise<string> {
const path = fileURLToPath(new URL('./skills/research/SKILL.md', import.meta.url));
const content = await readFile(path, 'utf-8');
return content.endsWith('\n') ? content : `${content}\n`;
}
function shellQuote(value: string): string {
if (/^[A-Za-z0-9_/:=.,@%+-]+$/.test(value)) {
return value;
@ -283,16 +472,22 @@ export function formatInstallSummary(
projectDir: string,
): string {
const entriesByTarget = new Map<KtxAgentTarget, InstallEntry[]>();
let idx = 0;
for (const install of installs) {
const planned = plannedKtxAgentFiles({ projectDir, ...install });
entriesByTarget.set(install.target, entries.slice(idx, idx + planned.length));
idx += planned.length;
const plannedFilePaths = new Set(
plannedKtxAgentFiles({ projectDir, ...install })
.filter((entry) => entry.kind === 'file')
.map((entry) => entry.path),
);
entriesByTarget.set(
install.target,
entries.filter((entry) => entry.kind === 'file' && plannedFilePaths.has(entry.path)),
);
}
const fileHints: Record<string, string> = {
skill: 'teaches your agent which KTX commands to run',
rule: 'tells your agent when to use KTX',
'research-skill': 'teaches your agent the KTX MCP research workflow',
};
const lines: string[] = [];
@ -304,7 +499,7 @@ export function formatInstallSummary(
install.scope === 'global' ? entry.path : relative(projectDir, entry.path);
if (entry.kind === 'file') {
const isRule = entry.role === 'rule' || fileEntryLabels[install.target] === 'Rule installed';
const label = isRule ? 'Rule installed' : fileEntryLabels[install.target];
const label = entry.role === 'research-skill' ? 'Research skill installed' : isRule ? 'Rule installed' : fileEntryLabels[install.target];
const hint = fileHints[isRule ? 'rule' : (entry.role ?? 'skill')] ?? '';
lines.push(` + ${label}${hint}`);
lines.push(` ${displayPath}`);
@ -327,6 +522,8 @@ async function installTarget(input: {
const content =
entry.role === 'rule'
? ruleInstructionContent({ projectDir: input.projectDir })
: entry.role === 'research-skill'
? await readResearchSkillContent()
: cliInstructionContent({ projectDir: input.projectDir, launcher });
await mkdir(dirname(entry.path), { recursive: true });
await writeFile(entry.path, content, 'utf-8');
@ -391,11 +588,25 @@ export async function runKtxSetupAgentsStep(
const installs = targets.map((target) => ({ target, scope: args.scope, mode }));
const entries: InstallEntry[] = [];
const snippets: string[] = [];
const notices = new Set<string>();
try {
for (const install of installs) entries.push(...(await installTarget({ projectDir: args.projectDir, ...install })));
for (const install of installs) {
entries.push(...(await installTarget({ projectDir: args.projectDir, ...install })));
const mcpResult = await installMcpClientConfig({ projectDir: args.projectDir, target: install.target, scope: install.scope });
entries.push(...mcpResult.entries);
for (const snippet of mcpResult.snippets) snippets.push(snippet);
for (const notice of mcpResult.notices) notices.add(notice);
}
await writeManifest(args.projectDir, mergeManifest(args.projectDir, await readKtxAgentInstallManifest(args.projectDir), installs, entries));
await markAgentsComplete(args.projectDir);
io.stdout.write(`\nAgent integration complete\n\n${formatInstallSummary(installs, entries, args.projectDir)}\n`);
for (const snippet of snippets) {
io.stdout.write(`\n${snippet}\n`);
}
for (const notice of notices) {
io.stdout.write(`\n${notice}\n`);
}
return { status: 'ready', projectDir: args.projectDir, installs };
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);

View file

@ -0,0 +1,49 @@
---
name: ktx-research
description: Use when answering a question that needs data from a KTX-connected database - investigating, analyzing, "how many", "show me", "what's the breakdown of", finding records by value, exploring tables, comparing periods, or any data-investigation request. Triggers even when the user does not say "research"; if the answer requires querying a configured KTX connection, this skill applies.
---
# KTX Research Workflow
You have access to KTX MCP tools for investigating data. Follow this workflow.
<workflow>
1. **Discover** - call `discover_data` first to see what exists across wiki, semantic-layer sources, and raw tables. Returns refs only.
2. **Inspect top hits in parallel** - for each promising ref:
- `kind: 'wiki'` -> `wiki_read`
- `kind: 'sl_source'`, `kind: 'sl_measure'`, or `kind: 'sl_dimension'` -> `sl_read_source`
- `kind: 'table'` or `kind: 'column'` -> `entity_details`
3. **Resolve literals** - if the user named a value such as "Acme Corp" or "status=shipped", call `dictionary_search` to find which column holds it.
4. **Query** -
- Prefer `sl_query` when the semantic layer covers the question.
- Use `sql_execution` only for questions the semantic layer does not cover.
5. **Capture learnings** - at the end of the turn, call `memory_capture` so future turns benefit. Skip when the answer carries no durable knowledge.
</workflow>
<rules>
- Always run `discover_data` before writing SQL. Do not guess table names.
- Prefer the semantic layer over raw SQL when both can answer the question; measures are the source of truth.
- Read entity details before writing SQL against an unfamiliar table. Do not assume column names.
- Treat `sql_execution` as read-only. Writes are rejected by the server.
- Validate value mentions with `dictionary_search` instead of guessing case or spelling. Treat a `dictionary_search` miss as non-authoritative. The index is built from profile-sampled values, so a missing value may simply have been outside the sample. Follow up with `sql_execution` against the most plausible columns before concluding the value is absent.
</rules>
<examples>
**Input:** "How many orders did Acme Corp place last month?"
**Workflow:**
1. `dictionary_search({ values: ["Acme Corp"] })` finds `customers.name`.
2. `discover_data({ query: "orders customer monthly" })` finds an orders semantic-layer source.
3. `sl_read_source({ connectionId: "warehouse", sourceName: "orders_facts" })` confirms the source grain, measures, and dimensions.
4. `sl_query({ connectionId: "warehouse", measures: ["order_count"], filters: ["customer_name = 'Acme Corp'"] })` answers through the semantic layer.
5. `memory_capture({ userMessage, assistantMessage })` captures the durable finding.
---
**Input:** "What columns does the events table have?"
**Workflow:**
1. `discover_data({ query: "events table" })` returns a `table` ref.
2. `entity_details({ connectionId: "warehouse", entities: [{ table: "analytics.events" }] })` returns columns, types, and foreign keys.
3. Answer directly. No query is needed.
</examples>

View file

@ -8,16 +8,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -41,16 +41,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -31,16 +31,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -27,16 +27,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -37,16 +37,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -34,16 +34,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -64,16 +64,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.
@ -85,11 +85,11 @@ SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
**Required flow before writing any overlay or standalone**:
1. Call `sl_discover({ query: "<tableName>" })` for each base table you're about to touch. That returns the real columns.
2. If the table isn't in the manifest, use the warehouse `connectionName`
2. If the table isn't in the manifest, use the warehouse `connectionId`
returned by `discover_data` or the target connection chosen from
`sl_discover`, then call a dialect-appropriate SQL probe with that
connection name, for example:
`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
connection id, for example:
`sql_execution({connectionId: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
Replace `warehouse`, `analytics`, and `orders` with the verified connection,
schema or dataset, and table from the WorkUnit evidence.
3. Use only those names in `sql:`, `columns:`, and `grain:`. Map each `dimension_group` to ONE `{ name: <physical_col>, type: time, role: time }` entry - never one per timeframe.

View file

@ -57,16 +57,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -42,16 +42,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.
@ -82,8 +82,8 @@ The `model:` field on a semantic_model is a string like `ref('table_name')`, `so
If `sl_discover` errors because no such table exists, use `discover_data` and
`entity_details` to find the warehouse target. If a SQL probe is still needed,
call `sql_execution` with the same warehouse connection name, for example:
`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
call `sql_execution` with the same warehouse connection id, for example:
`sql_execution({connectionId: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
**Never invent column names** - every column in computed `columns:`, `column_overrides:`, `grain:`, and
`sql:` must be sourced from raw files, `entity_details`, or a successful SQL
probe.

View file

@ -79,16 +79,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -214,16 +214,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.
@ -239,7 +239,7 @@ SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
3. `sl_read_source({ connectionId, sourceName })` - read the raw YAML before editing.
4. For modifications: `sl_edit_source({ connectionId, sourceName, yaml_edits: [{ oldText, newText, reason }] })` with exact-string replacements. `oldText` must match exactly and be unique in the file.
5. For new sources or full rewrites: `sl_write_source({ connectionId, sourceName, source })` with the full structured source definition.
6. For join discovery: use `sql_execution({connectionName: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection name and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join.
6. For join discovery: use `sql_execution({connectionId: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection id and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join.
7. Cross-reference knowledge: author the edge once on the **wiki** side via `sl_refs: [source_name]` in the page's front-matter. The reverse edge (wiki pages that cite an SL source) is derived automatically by the reconciler - do not add a `knowledge_refs:` field to SL YAMLs.
8. `sl_validate` - run after writing or editing to surface schema issues, duplicate measure names, and cross-source validation errors. Read-only; the writes are already committed (the squash-at-end flow will collapse them into one commit).
@ -315,7 +315,7 @@ Prior turn: user asked to correlate LTV with protocol count; assistant joined `f
sl_read_source({ connectionId: "warehouse", sourceName: "fct_orders" })
→ no joins section yet
sql_execution({
connectionName: "warehouse",
connectionId: "warehouse",
sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM public.fct_orders a JOIN public.fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1"
})
→ confirms cardinality (many orders per MAU row = many_to_one)

View file

@ -60,16 +60,16 @@ Before writing a wiki page or SL source on any topic:
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
run a `sql_execution` probe with the same warehouse connection id:
`sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
- Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.

View file

@ -18,6 +18,9 @@ const sqlAnalysis: SqlAnalysisPort = {
async analyzeBatch() {
return new Map();
},
async validateReadOnly() {
return { ok: true };
},
};
const reader: HistoricSqlReader = {
@ -79,6 +82,9 @@ describe('HistoricSqlSourceAdapter', () => {
],
]);
},
async validateReadOnly() {
return { ok: true };
},
};
const adapter = new HistoricSqlSourceAdapter({
sqlAnalysis: batchSqlAnalysis,

View file

@ -159,6 +159,7 @@ function acceptanceSqlAnalysis(): SqlAnalysisPort {
);
},
),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
}

View file

@ -83,6 +83,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
],
['bad-parse', { tablesTouched: [], columnsByClause: {}, error: 'parse failed' }],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
@ -207,6 +208,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
@ -283,6 +285,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
@ -403,6 +406,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({

View file

@ -94,11 +94,15 @@ describe('ingest runtime assets', () => {
it('packages identifier verification prompt assets', async () => {
const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
const legacyConnectionPrefix = ['connection', 'Name'].join('');
expect(shared).toContain('## Identifier Verification Protocol');
expect(shared).toContain('discover_data');
expect(shared).toContain('entity_details');
expect(shared).toContain('sql_execution');
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
expect(shared).toContain('sql_execution({connectionId, sql: "SELECT DISTINCT');
expect(shared).toContain('sql_execution({connectionId, sql: "SELECT 1 FROM');
expect(shared).not.toContain(`entity_details({${legacyConnectionPrefix}`);
expect(shared).not.toContain(`sql_execution({${legacyConnectionPrefix}`);
});
});

View file

@ -97,6 +97,9 @@ describe('local ingest adapters', () => {
async analyzeBatch() {
return new Map();
},
async validateReadOnly() {
return { ok: true };
},
};
const adapters = createDefaultLocalIngestAdapters(project, {
historicSql: {
@ -140,6 +143,9 @@ describe('local ingest adapters', () => {
async analyzeBatch() {
return new Map();
},
async validateReadOnly() {
return { ok: true };
},
},
reader,
queryClient,
@ -166,6 +172,9 @@ describe('local ingest adapters', () => {
async analyzeBatch() {
return new Map();
},
async validateReadOnly() {
return { ok: true };
},
},
postgresQueryClient: {
async executeQuery() {
@ -258,6 +267,9 @@ describe('local ingest adapters', () => {
async analyzeBatch() {
return new Map();
},
async validateReadOnly() {
return { ok: true };
},
},
postgresQueryClient: {
async executeQuery() {

View file

@ -1,7 +1,7 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
import type { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
import type { BaseTool, ToolContext } from '../../../tools/index.js';
import { DiscoverDataTool } from './discover-data.tool.js';
import type { WarehouseCatalogService } from './warehouse-catalog.service.js';
describe('DiscoverDataTool', () => {
const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
@ -36,7 +36,7 @@ describe('DiscoverDataTool', () => {
catalog.searchByName.mockResolvedValue([
{
kind: 'table',
connectionName: 'warehouse',
connectionId: 'warehouse',
ref: { catalog: null, db: 'public', name: 'orders' },
display: 'public.orders',
matchedOn: 'name',
@ -45,28 +45,28 @@ describe('DiscoverDataTool', () => {
});
it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => {
const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context);
const result = await tool.call({ query: 'orders', connectionId: 'warehouse', limit: 5 }, context);
expect(result.markdown).toContain('## Wiki Pages');
expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content');
expect(result.markdown).toContain('## Semantic Layer Sources');
expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML');
expect(result.markdown).toContain('## Raw Warehouse Schema');
expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`');
expect(result.markdown).toContain('use `entity_details({connectionId, targets: [{display}]})`');
expect(result.structured.raw?.hits).toHaveLength(1);
});
it('includes connectionName on raw schema hits so entity_details can follow up', async () => {
it('includes connectionId on raw schema hits so entity_details can follow up', async () => {
const multiConnectionContext: ToolContext = {
...context,
session: { allowedConnectionNames: new Set(['warehouse', 'analytics']) } as any,
};
catalog.searchByName.mockImplementation(async (connectionName: string, query: string) => [
catalog.searchByName.mockImplementation(async (connectionId: string, query: string) => [
{
kind: 'table',
connectionName,
ref: { catalog: null, db: 'public', name: `${connectionName}_${query}` },
display: `public.${connectionName}_${query}`,
connectionId,
ref: { catalog: null, db: 'public', name: `${connectionId}_${query}` },
display: `public.${connectionId}_${query}`,
matchedOn: 'name',
},
]);
@ -75,16 +75,16 @@ describe('DiscoverDataTool', () => {
expect(catalog.searchByName).toHaveBeenCalledWith('analytics', 'orders', 10);
expect(catalog.searchByName).toHaveBeenCalledWith('warehouse', 'orders', 10);
expect(result.markdown).toContain('connectionName=analytics');
expect(result.markdown).toContain('connectionName=warehouse');
expect(result.markdown).toContain('connectionId=analytics');
expect(result.markdown).toContain('connectionId=warehouse');
expect(result.markdown).toContain(
'entity_details({connectionName: "analytics", targets: [{display: "public.analytics_orders"}]})',
'entity_details({connectionId: "analytics", targets: [{display: "public.analytics_orders"}]})',
);
expect(result.structured.raw?.hits.map((hit) => hit.connectionName)).toEqual(['analytics', 'warehouse']);
expect(result.structured.raw?.hits.map((hit) => hit.connectionId)).toEqual(['analytics', 'warehouse']);
});
it('refuses explicit out-of-scope connection names', async () => {
const result = await tool.call({ query: 'orders', connectionName: 'billing' }, context);
const result = await tool.call({ query: 'orders', connectionId: 'billing' }, context);
expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
expect(result.structured).toEqual({ wiki: null, sl: null, raw: null });
@ -99,7 +99,7 @@ describe('DiscoverDataTool', () => {
structured: { sourceName: 'orders' },
});
const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context);
const result = await tool.call({ sourceName: 'orders', connectionId: 'warehouse' }, context);
expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context);
expect(wikiSearchTool.call).not.toHaveBeenCalled();
@ -112,8 +112,20 @@ describe('DiscoverDataTool', () => {
slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } });
catalog.searchByName.mockResolvedValueOnce([]);
const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context);
const result = await tool.call({ query: 'customer source', connectionId: 'warehouse' }, context);
expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.');
});
it('uses connectionId as the optional connection filter', () => {
const legacyConnectionField = ['connection', 'Name'].join('');
expect(tool.parseInput({ query: 'orders', connectionId: 'warehouse', limit: 5 })).toEqual({
query: 'orders',
connectionId: 'warehouse',
limit: 5,
});
expect(() => tool.parseInput({ query: 'orders', [legacyConnectionField]: 'warehouse', limit: 5 })).toThrow();
});
});

View file

@ -1,13 +1,13 @@
import { z } from 'zod';
import { WarehouseCatalogService, type RawSchemaHit } from '../../../scan/warehouse-catalog.js';
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js';
const discoverDataInputSchema = z.object({
query: z.string().optional(),
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
limit: z.number().int().positive().max(50).optional().default(10),
sourceName: z.string().optional(),
});
}).strict();
type DiscoverDataInput = z.input<typeof discoverDataInputSchema>;
@ -62,16 +62,16 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
async call(input: DiscoverDataInput, context: ToolContext): Promise<ToolOutput<DiscoverDataStructured>> {
const allowed = allowedConnectionNames(context);
if (input.connectionName && allowed && !allowed.has(input.connectionName)) {
if (input.connectionId && allowed && !allowed.has(input.connectionId)) {
return {
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
structured: { wiki: null, sl: null, raw: null },
};
}
if (input.sourceName) {
const sl = await this.deps.slDiscoverTool.call(
{ sourceName: input.sourceName, connectionId: input.connectionName },
{ sourceName: input.sourceName, connectionId: input.connectionId },
context,
);
return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } };
@ -93,7 +93,7 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
}
const slResult = await this.deps.slDiscoverTool.call(
{ query: query || undefined, connectionId: input.connectionName },
{ query: query || undefined, connectionId: input.connectionId },
context,
);
if (totalSources(slResult.structured) > 0) {
@ -107,23 +107,23 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
}
const catalog = this.deps.catalogFactory(context);
const connections = input.connectionName ? [input.connectionName] : [...(allowed ?? [])].sort();
const connections = input.connectionId ? [input.connectionId] : [...(allowed ?? [])].sort();
const rawHits: RawSchemaHit[] = [];
for (const connectionName of connections) {
rawHits.push(...(await catalog.searchByName(connectionName, query, limit)));
for (const connectionId of connections) {
rawHits.push(...(await catalog.searchByName(connectionId, query, limit)));
}
if (rawHits.length > 0) {
parts.push(
'## Raw Warehouse Schema',
'> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values',
'> use `entity_details({connectionId, targets: [{display}]})` for full DDL + sample values',
);
parts.push(
rawHits
.slice(0, limit)
.map(
(hit) =>
`- ${hit.kind}: ${hit.display} [connectionName=${hit.connectionName}] (matched on ${hit.matchedOn}) - ` +
`follow up with \`entity_details({connectionName: "${hit.connectionName}", targets: [{display: "${hit.display}"}]})\``,
`- ${hit.kind}: ${hit.display} [connectionId=${hit.connectionId}] (matched on ${hit.matchedOn}) - ` +
`follow up with \`entity_details({connectionId: "${hit.connectionId}", targets: [{display: "${hit.display}"}]})\``,
)
.join('\n'),
);

View file

@ -3,9 +3,9 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../project/index.js';
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
import type { ToolContext } from '../../../tools/index.js';
import { EntityDetailsTool } from './entity-details.tool.js';
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
describe('EntityDetailsTool', () => {
let tempDir: string;
@ -32,11 +32,11 @@ describe('EntityDetailsTool', () => {
await rm(tempDir, { recursive: true, force: true });
});
async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-1') {
const root = `raw-sources/${connectionName}/live-database/${syncId}`;
async function seedLiveDatabaseScan(connectionId = 'warehouse', syncId = 'sync-1') {
const root = `raw-sources/${connectionId}/live-database/${syncId}`;
await project.fileStore.writeFile(
`${root}/connection.json`,
JSON.stringify({ connectionId: connectionName, driver: 'postgres', extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
JSON.stringify({ connectionId, driver: 'postgres', extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
'ktx',
'ktx@example.com',
'seed connection',
@ -84,7 +84,7 @@ describe('EntityDetailsTool', () => {
`${root}/enrichment/relationship-profile.json`,
JSON.stringify(
{
connectionId: connectionName,
connectionId,
driver: 'postgres',
tables: [{ table: { catalog: null, db: 'public', name: 'orders' }, rowCount: 12 }],
columns: {
@ -109,7 +109,7 @@ describe('EntityDetailsTool', () => {
}
it('returns scoped table detail for a display target', async () => {
const result = await tool.call({ connectionName: 'warehouse', targets: [{ display: 'public.orders' }] }, context);
const result = await tool.call({ connectionId: 'warehouse', targets: [{ display: 'public.orders' }] }, context);
expect(result.markdown).toContain('### public.orders');
expect(result.markdown).toContain('- status (text, nullable=false)');
@ -120,7 +120,7 @@ describe('EntityDetailsTool', () => {
it('resolves display targets that include a column name', async () => {
const result = await tool.call(
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] },
{ connectionId: 'warehouse', targets: [{ display: 'public.orders.status' }] },
context,
);
@ -133,7 +133,7 @@ describe('EntityDetailsTool', () => {
it('reports missing explicit columns instead of returning an empty column list', async () => {
const result = await tool.call(
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
{ connectionId: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
context,
);
@ -146,7 +146,7 @@ describe('EntityDetailsTool', () => {
it('reports missing structured table targets in model-visible markdown', async () => {
const result = await tool.call(
{
connectionName: 'warehouse',
connectionId: 'warehouse',
targets: [{ catalog: null, db: 'public', name: 'orderz' }],
},
context,
@ -161,7 +161,7 @@ describe('EntityDetailsTool', () => {
it('reports missing structured column targets in model-visible markdown', async () => {
const result = await tool.call(
{
connectionName: 'warehouse',
connectionId: 'warehouse',
targets: [{ catalog: null, db: 'public', name: 'orders', column: 'plan_tier' }],
},
context,
@ -175,7 +175,7 @@ describe('EntityDetailsTool', () => {
it('returns a no-scan state distinct from not found', async () => {
const result = await tool.call(
{ connectionName: 'empty', targets: [{ display: 'public.orders' }] },
{ connectionId: 'empty', targets: [{ display: 'public.orders' }] },
{ ...context, session: { ...context.session!, allowedConnectionNames: new Set(['empty']) } },
);
@ -184,9 +184,30 @@ describe('EntityDetailsTool', () => {
});
it('refuses out-of-scope connections', async () => {
const result = await tool.call({ connectionName: 'billing', targets: [{ display: 'public.orders' }] }, context);
const result = await tool.call({ connectionId: 'billing', targets: [{ display: 'public.orders' }] }, context);
expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
expect(result.structured.scanAvailable).toBe(false);
});
it('uses connectionId as the public input field', async () => {
const legacyConnectionField = ['connection', 'Name'].join('');
expect(
tool.parseInput({
connectionId: 'warehouse',
targets: [{ display: 'public.orders' }],
}),
).toEqual({
connectionId: 'warehouse',
targets: [{ display: 'public.orders' }],
});
expect(() =>
tool.parseInput({
[legacyConnectionField]: 'warehouse',
targets: [{ display: 'public.orders' }],
}),
).toThrow();
});
});

View file

@ -1,7 +1,7 @@
import { z } from 'zod';
import type { KtxTableRef } from '../../../scan/types.js';
import { WarehouseCatalogService, type TableDetail } from '../../../scan/warehouse-catalog.js';
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
import { WarehouseCatalogService, type TableDetail } from './warehouse-catalog.service.js';
const targetSchema = z.union([
z.object({ display: z.string().min(1) }),
@ -14,9 +14,9 @@ const targetSchema = z.union([
]);
const entityDetailsInputSchema = z.object({
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
targets: z.array(targetSchema).min(1).max(50),
});
}).strict();
type EntityDetailsInput = z.infer<typeof entityDetailsInputSchema>;
type EntityDetailsTarget = EntityDetailsInput['targets'][number];
@ -47,14 +47,14 @@ function appendMissingTargetMarkdown(parts: string[], target: EntityDetailsTarge
async function resolveTarget(
catalog: WarehouseCatalogService,
connectionName: string,
connectionId: string,
target: EntityDetailsTarget,
): Promise<{ resolved: (KtxTableRef & { column?: string }) | null; candidates: KtxTableRef[] }> {
if ('display' in target) {
return catalog.resolveDisplayTarget(connectionName, target.display);
return catalog.resolveDisplayTarget(connectionId, target.display);
}
const candidateResolution = await catalog.resolveDisplayTarget(connectionName, targetLabel(target));
const candidateResolution = await catalog.resolveDisplayTarget(connectionId, targetLabel(target));
return {
resolved: {
catalog: target.catalog,
@ -107,18 +107,18 @@ export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema>
async call(input: EntityDetailsInput, context: ToolContext): Promise<ToolOutput<EntityDetailsStructured>> {
const allowed = allowedConnectionNames(context);
if (allowed && !allowed.has(input.connectionName)) {
if (allowed && !allowed.has(input.connectionId)) {
return {
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
structured: { resolved: [], missing: [], scanAvailable: false },
};
}
const catalog = this.catalogFactory(context);
const scanAvailable = await catalog.hasScan(input.connectionName);
const scanAvailable = await catalog.hasScan(input.connectionId);
if (!scanAvailable) {
return {
markdown: `No live-database scan available for connection "${input.connectionName}"; run \`ktx scan\` first.`,
markdown: `No live-database scan available for connection "${input.connectionId}"; run \`ktx scan\` first.`,
structured: { resolved: [], missing: [], scanAvailable: false },
};
}
@ -128,13 +128,13 @@ export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema>
const missing: EntityDetailsStructured['missing'] = [];
for (const target of input.targets) {
const resolution = await resolveTarget(catalog, input.connectionName, target);
const resolution = await resolveTarget(catalog, input.connectionId, target);
if (!resolution.resolved) {
missing.push({ target, candidates: resolution.candidates });
appendMissingTargetMarkdown(parts, target, resolution.candidates);
continue;
}
const detail = await catalog.getTable({ connectionName: input.connectionName, ...resolution.resolved });
const detail = await catalog.getTable({ connectionId: input.connectionId, ...resolution.resolved });
if (!detail) {
missing.push({ target, candidates: resolution.candidates });
appendMissingTargetMarkdown(parts, target, resolution.candidates);

View file

@ -1,10 +1,10 @@
import type { KtxFileStorePort } from '../../../core/index.js';
import type { SlConnectionCatalogPort } from '../../../sl/index.js';
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
import type { BaseTool, ToolContext } from '../../../tools/index.js';
import { DiscoverDataTool } from './discover-data.tool.js';
import { EntityDetailsTool } from './entity-details.tool.js';
import { SqlExecutionTool } from './sql-execution.tool.js';
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
export function createWarehouseVerificationTools(deps: {
connections: SlConnectionCatalogPort;

View file

@ -19,7 +19,7 @@ describe('SqlExecutionTool', () => {
connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 });
const result = await tool.call(
{ connectionName: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 },
{ connectionId: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 },
context,
);
@ -34,7 +34,7 @@ describe('SqlExecutionTool', () => {
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => {
connections.executeQuery.mockClear();
const result = await tool.call({ connectionName: 'warehouse', sql }, context);
const result = await tool.call({ connectionId: 'warehouse', sql }, context);
expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
expect(connections.executeQuery).not.toHaveBeenCalled();
@ -44,11 +44,35 @@ describe('SqlExecutionTool', () => {
connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist'));
const result = await tool.call(
{ connectionName: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 },
{ connectionId: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 },
context,
);
expect(result.markdown).toContain('relation "orbit_analytics.customer" does not exist');
expect(result.structured.error).toContain('relation "orbit_analytics.customer" does not exist');
});
it('uses connectionId as the public input field', () => {
const legacyConnectionField = ['connection', 'Name'].join('');
expect(
tool.parseInput({
connectionId: 'warehouse',
sql: 'select 1',
rowLimit: 5,
}),
).toEqual({
connectionId: 'warehouse',
sql: 'select 1',
rowLimit: 5,
});
expect(() =>
tool.parseInput({
[legacyConnectionField]: 'warehouse',
sql: 'select 1',
rowLimit: 5,
}),
).toThrow();
});
});

View file

@ -4,10 +4,10 @@ import type { SlConnectionCatalogPort } from '../../../sl/index.js';
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
const sqlExecutionInputSchema = z.object({
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
sql: z.string().min(1),
rowLimit: z.number().int().positive().max(1000).optional().default(100),
});
}).strict();
type SqlExecutionInput = z.input<typeof sqlExecutionInputSchema>;
@ -54,9 +54,9 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
async call(input: SqlExecutionInput, context: ToolContext): Promise<ToolOutput<SqlExecutionStructured>> {
const allowed = context.session?.allowedConnectionNames;
if (allowed && !allowed.has(input.connectionName)) {
if (allowed && !allowed.has(input.connectionId)) {
return {
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
structured: {
headers: [],
rows: [],
@ -83,7 +83,7 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
}
try {
const result = await this.connections.executeQuery(input.connectionName, wrappedSql);
const result = await this.connections.executeQuery(input.connectionId, wrappedSql);
const headers = result.headers ?? [];
const rows = result.rows ?? [];
const rowCount = result.totalRows ?? rows.length;

View file

@ -143,6 +143,45 @@ const scanArtifactReadSchema = z.object({
path: z.string().min(1),
});
const entityDetailsTableRefSchema = z.object({
catalog: z.string().nullable(),
db: z.string().nullable(),
name: z.string().min(1),
});
const entityDetailsSchema = z.object({
connectionId: connectionIdSchema,
entities: z
.array(
z.object({
table: z.union([z.string().min(1), entityDetailsTableRefSchema]),
columns: z.array(z.string().min(1)).optional(),
}),
)
.min(1)
.max(20),
});
const dictionarySearchSchema = z.object({
values: z.array(z.string().min(1)).min(1).max(20),
connectionId: connectionIdSchema.optional(),
});
const discoverDataKindSchema = z.enum(['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column']);
const discoverDataSchema = z.object({
query: z.string().min(1),
connectionId: connectionIdSchema.optional(),
kinds: z.array(discoverDataKindSchema).optional(),
limit: z.number().int().min(1).max(50).default(15).optional(),
});
const sqlExecutionSchema = z.object({
connectionId: connectionIdSchema,
sql: z.string().min(1),
maxRows: z.number().int().min(1).max(10_000).default(1000).optional(),
});
export function jsonToolResult<T extends object>(structuredContent: T): KtxMcpToolResult<T> {
return {
content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }],
@ -361,6 +400,81 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void
);
}
if (ports.entityDetails) {
const entityDetails = ports.entityDetails;
registerParsedTool(
server,
'entity_details',
{
title: 'Entity Details',
description: 'Read raw table and column metadata from the latest KTX live-database scan snapshot.',
inputSchema: entityDetailsSchema.shape,
},
entityDetailsSchema,
async (input) => jsonToolResult(await entityDetails.read(input)),
);
}
if (ports.dictionarySearch) {
const dictionarySearch = ports.dictionarySearch;
registerParsedTool(
server,
'dictionary_search',
{
title: 'Dictionary Search',
description:
'Search profile-sampled warehouse values and report matching connection/source/column locations plus non-authoritative miss reasons.',
inputSchema: dictionarySearchSchema.shape,
},
dictionarySearchSchema,
async (input) => jsonToolResult(await dictionarySearch.search(input)),
);
}
if (ports.discover) {
const discover = ports.discover;
registerParsedTool(
server,
'discover_data',
{
title: 'Discover Data',
description:
'Search across KTX wiki pages, semantic-layer sources/measures/dimensions, and raw warehouse schema refs.',
inputSchema: discoverDataSchema.shape,
},
discoverDataSchema,
async (input) => jsonToolResult(await discover.search(input)),
);
}
if (ports.sqlExecution) {
const sqlExecution = ports.sqlExecution;
registerParsedTool(
server,
'sql_execution',
{
title: 'SQL Execution',
description:
'Execute one parser-validated read-only SQL query against a configured KTX connection and return structured rows.',
inputSchema: sqlExecutionSchema.shape,
},
sqlExecutionSchema,
async (input) => {
try {
return jsonToolResult(
await sqlExecution.execute({
connectionId: input.connectionId,
sql: input.sql,
maxRows: input.maxRows ?? 1000,
}),
);
} catch (error) {
return jsonErrorToolResult(error instanceof Error ? error.message : String(error));
}
},
);
}
if (ports.ingest) {
const ingest = ports.ingest;
registerParsedTool(

View file

@ -5,6 +5,9 @@ export { createDefaultKtxMcpServer, createKtxMcpServer } from './server.js';
export type {
KtxConnectionSummary,
KtxConnectionsMcpPort,
KtxDiscoverDataMcpPort,
KtxDictionarySearchMcpPort,
KtxEntityDetailsMcpPort,
KtxIngestDiffSummary,
KtxIngestMcpPort,
KtxIngestStatusResponse,

View file

@ -5,7 +5,12 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { AgentRunnerService } from '../agent/index.js';
import { FakeSourceAdapter, type MemoryFlowReplayInput } from '../ingest/index.js';
import { initKtxProject } from '../project/index.js';
import { createKtxConnectorCapabilities, type KtxScanConnector, type KtxSchemaSnapshot } from '../scan/index.js';
import {
createKtxConnectorCapabilities,
type KtxQueryResult,
type KtxScanConnector,
type KtxSchemaSnapshot,
} from '../scan/index.js';
import { writeLocalSlSource } from '../sl/index.js';
import { createLocalProjectMcpContextPorts } from './local-project-ports.js';
@ -60,16 +65,119 @@ describe('createLocalProjectMcpContextPorts', () => {
};
}
function testConnector(snapshot = testSnapshot()): KtxScanConnector {
function testConnector(snapshot = testSnapshot(), queryResult?: KtxQueryResult): KtxScanConnector {
return {
id: `test:${snapshot.connectionId}`,
driver: snapshot.driver,
capabilities: createKtxConnectorCapabilities(),
capabilities: createKtxConnectorCapabilities({ readOnlySql: queryResult !== undefined }),
introspect: vi.fn(async () => snapshot),
executeReadOnly: queryResult === undefined ? undefined : vi.fn(async () => queryResult),
cleanup: vi.fn(async () => {}),
};
}
async function seedScanReport(projectDir: string, syncId = 'sync-1'): Promise<void> {
const root = `raw-sources/warehouse/live-database/${syncId}`;
await mkdir(join(projectDir, root, 'tables'), { recursive: true });
await writeFile(
join(projectDir, root, 'connection.json'),
JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
extractedAt: '2026-05-14T09:00:00.000Z',
scope: { schemas: ['public'] },
},
null,
2,
),
'utf-8',
);
await writeFile(
join(projectDir, root, 'tables', 'orders.json'),
JSON.stringify(
{
catalog: null,
db: 'public',
name: 'orders',
kind: 'table',
comment: 'Customer orders',
estimatedRows: 12,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
null,
2,
),
'utf-8',
);
await writeFile(
join(projectDir, root, 'scan-report.json'),
JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
syncId,
runId: 'scan-1',
trigger: 'mcp',
mode: 'structural',
dryRun: false,
artifactPaths: {
rawSourcesDir: root,
reportPath: `${root}/scan-report.json`,
manifestShards: [],
enrichmentArtifacts: [],
},
diffSummary: {
tablesAdded: 0,
tablesModified: 0,
tablesDeleted: 0,
tablesUnchanged: 1,
columnsAdded: 0,
columnsModified: 0,
columnsDeleted: 0,
},
manifestShardsWritten: 0,
structuralSyncStats: {
tablesCreated: 1,
tablesUpdated: 0,
tablesDeleted: 0,
columnsCreated: 0,
columnsUpdated: 0,
columnsDeleted: 0,
},
enrichment: {
dataDictionary: 'skipped',
tableDescriptions: 'skipped',
columnDescriptions: 'skipped',
embeddings: 'skipped',
deterministicRelationships: 'skipped',
llmRelationshipValidation: 'skipped',
statisticalValidation: 'skipped',
},
capabilityGaps: [],
warnings: [],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
createdAt: '2026-05-14T09:00:00.000Z',
},
null,
2,
),
'utf-8',
);
}
it('lists local project connections from ktx.yaml', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
@ -119,6 +227,382 @@ describe('createLocalProjectMcpContextPorts', () => {
expect(connector.cleanup).toHaveBeenCalled();
});
it('executes MCP SQL only after parser-backed validation passes', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
const connector = testConnector(testSnapshot(), {
headers: ['id'],
headerTypes: ['integer'],
rows: [[1]],
totalRows: 1,
rowCount: 1,
});
const createConnector = vi.fn(async () => connector);
const sqlAnalysis = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
};
const ports = createLocalProjectMcpContextPorts(project, {
sqlAnalysis,
localScan: {
createConnector,
},
});
await expect(
ports.sqlExecution?.execute({
connectionId: 'warehouse',
sql: 'select id from public.orders',
maxRows: 5,
}),
).resolves.toEqual({
headers: ['id'],
headerTypes: ['integer'],
rows: [[1]],
rowCount: 1,
});
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select id from public.orders', 'postgres');
expect(createConnector).toHaveBeenCalledWith('warehouse');
expect(connector.executeReadOnly).toHaveBeenCalledWith(
{
connectionId: 'warehouse',
sql: 'select id from public.orders',
maxRows: 5,
},
{ runId: 'mcp-sql-execution' },
);
expect(connector.cleanup).toHaveBeenCalled();
});
it('rejects MCP SQL before connector execution when parser validation fails', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
const connector = testConnector(testSnapshot(), {
headers: ['id'],
rows: [[1]],
totalRows: 1,
rowCount: 1,
});
const sqlAnalysis = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => ({
ok: false,
error: 'SQL contains read/write operation: Insert',
})),
};
const ports = createLocalProjectMcpContextPorts(project, {
sqlAnalysis,
localScan: {
createConnector: vi.fn(async () => connector),
},
});
await expect(
ports.sqlExecution?.execute({
connectionId: 'warehouse',
sql: 'with x as (insert into t values (1) returning *) select * from x',
maxRows: 1000,
}),
).rejects.toThrow('SQL contains read/write operation: Insert');
expect(connector.executeReadOnly).not.toHaveBeenCalled();
});
it('exposes local scan entity details through MCP ports', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
await seedScanReport(project.projectDir);
const ports = createLocalProjectMcpContextPorts(project);
await expect(
ports.entityDetails?.read({
connectionId: 'warehouse',
entities: [{ table: 'public.orders', columns: ['id'] }],
}),
).resolves.toMatchObject({
results: [
{
ok: true,
connectionId: 'warehouse',
display: 'public.orders',
columns: [{ name: 'id', nativeType: 'integer' }],
snapshot: { syncId: 'sync-1', scanRunId: 'scan-1' },
},
],
});
});
it('returns a structured local entity details error when no scan exists', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
const ports = createLocalProjectMcpContextPorts(project);
await expect(
ports.entityDetails?.read({
connectionId: 'warehouse',
entities: [{ table: 'public.orders' }],
}),
).resolves.toMatchObject({
results: [
{
ok: false,
connectionId: 'warehouse',
error: { code: 'scan_missing' },
},
],
});
});
it('exposes local dictionary search through MCP ports', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 4,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 2,
sampleValues: ['paid', 'refunded'],
},
},
warnings: [],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed dictionary profile',
);
const ports = createLocalProjectMcpContextPorts(project);
await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toMatchObject({
searched: [{ connectionId: 'warehouse', status: 'ready' }],
results: [
{
value: 'paid',
matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', matchedValue: 'paid' }],
misses: [],
},
],
});
});
it('reports missing local dictionary profiles through MCP ports', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
const ports = createLocalProjectMcpContextPorts(project);
await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toEqual({
searched: [
{
connectionId: 'warehouse',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: null,
profiledAt: null,
},
status: 'no_profile_artifact',
},
],
results: [
{
value: 'paid',
matches: [],
misses: [{ connectionId: 'warehouse', reason: 'no_profile_artifact' }],
},
],
});
});
it('exposes local project discover_data across wiki, semantic-layer, and raw schema', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
await project.fileStore.writeFile(
'wiki/global/orders-playbook.md',
[
'---',
'summary: Paid order operations',
'tags: [orders]',
'refs: []',
'sl_refs: []',
'usage_mode: auto',
'---',
'',
'Paid orders are used for customer activity analysis.',
'',
].join('\n'),
'ktx',
'ktx@example.com',
'seed wiki',
);
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
[
'name: orders',
'descriptions:',
' user: Paid order facts',
'table: public.orders',
'grain: [id]',
'columns:',
' - name: status',
' type: string',
' descriptions:',
' user: Payment status',
'measures:',
' - name: order_count',
' expr: count(*)',
' description: Number of paid orders',
'',
].join('\n'),
'ktx',
'ktx@example.com',
'seed sl',
);
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/connection.json',
JSON.stringify({ connectionId: 'warehouse', driver: 'postgres', extractedAt: '2026-05-14T09:00:00.000Z' }, null, 2),
'ktx',
'ktx@example.com',
'seed connection',
);
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/tables/public-orders.json',
JSON.stringify(
{
catalog: null,
db: 'public',
name: 'orders',
kind: 'table',
comment: 'Orders table',
estimatedRows: 10,
columns: [
{
name: 'status',
nativeType: 'text',
normalizedType: 'text',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: 'Order status',
sampleValues: ['paid'],
},
],
foreignKeys: [],
},
null,
2,
),
'ktx',
'ktx@example.com',
'seed table',
);
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/scan-report.json',
JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
syncId: 'sync-1',
runId: 'scan-1',
trigger: 'mcp',
mode: 'enriched',
dryRun: false,
artifactPaths: {
rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
manifestShards: [],
enrichmentArtifacts: [],
},
diffSummary: {
tablesAdded: 1,
tablesModified: 0,
tablesDeleted: 0,
tablesUnchanged: 0,
columnsAdded: 0,
columnsModified: 0,
columnsDeleted: 0,
},
manifestShardsWritten: 0,
structuralSyncStats: {
tablesCreated: 0,
tablesUpdated: 0,
tablesDeleted: 0,
columnsCreated: 0,
columnsUpdated: 0,
columnsDeleted: 0,
},
enrichment: {
dataDictionary: 'completed',
tableDescriptions: 'completed',
columnDescriptions: 'completed',
embeddings: 'skipped',
deterministicRelationships: 'skipped',
llmRelationshipValidation: 'skipped',
statisticalValidation: 'skipped',
},
capabilityGaps: [],
warnings: [],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
createdAt: '2026-05-14T09:00:00.000Z',
},
null,
2,
),
'ktx',
'ktx@example.com',
'seed scan report',
);
const ports = createLocalProjectMcpContextPorts(project);
const results = await ports.discover?.search({ query: 'paid orders', connectionId: 'warehouse', limit: 10 });
expect(results).toEqual(
expect.arrayContaining([
expect.objectContaining({ kind: 'wiki', id: 'orders-playbook' }),
expect.objectContaining({ kind: 'sl_source', id: 'orders', connectionId: 'warehouse' }),
expect.objectContaining({ kind: 'table', id: 'public.orders', connectionId: 'warehouse' }),
]),
);
});
it('triggers canonical bundle ingest and reads status, report, and replay through MCP ports', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {

View file

@ -18,6 +18,7 @@ import {
import { createLocalKtxEmbeddingProviderFromConfig, KtxIngestEmbeddingPortAdapter } from '../llm/index.js';
import type { KtxLocalProject } from '../project/index.js';
import {
createKtxEntityDetailsService,
getLocalScanReport,
getLocalScanStatus,
type KtxConnectionDriver,
@ -26,8 +27,11 @@ import {
type LocalScanMcpOptions,
runLocalScan,
} from '../scan/index.js';
import { createKtxDiscoverDataService } from '../search/index.js';
import type { SqlAnalysisDialect, SqlAnalysisPort } from '../sql-analysis/index.js';
import {
compileLocalSlQuery,
createKtxDictionarySearchService,
type LocalSlSourceSearchResult,
type LocalSlSourceSummary,
listLocalSlSources,
@ -44,6 +48,7 @@ import type {
KtxScanArtifactReadResponse,
KtxScanArtifactSummary,
KtxScanArtifactType,
KtxSqlExecutionResponse,
} from './types.js';
const LOCAL_AUTHOR = 'ktx';
@ -53,6 +58,7 @@ const SL_SHAPE_WARNING = 'Local stdio validation checks YAML shape only; Python
interface CreateLocalProjectMcpContextPortsOptions {
semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: KtxSqlQueryExecutorPort;
sqlAnalysis?: SqlAnalysisPort;
localIngest?: LocalIngestMcpOptions;
localScan?: LocalScanMcpOptions;
embeddingService?: KtxEmbeddingPort | null;
@ -77,6 +83,10 @@ function dialectForDriver(driver: string | undefined): string {
return map[normalized] ?? 'postgres';
}
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
return dialectForDriver(driver) as SqlAnalysisDialect;
}
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
@ -378,6 +388,53 @@ function statusFromIngestReport(report: IngestReportSnapshot): KtxIngestStatusRe
};
}
async function executeValidatedReadOnlySql(
project: KtxLocalProject,
options: CreateLocalProjectMcpContextPortsOptions,
input: { connectionId: string; sql: string; maxRows: number },
): Promise<KtxSqlExecutionResponse> {
const connectionId = assertSafeConnectionId(input.connectionId);
const connection = project.config.connections[connectionId];
if (!connection) {
throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
}
if (!options.sqlAnalysis) {
throw new Error('sql_execution requires parser-backed SQL validation.');
}
const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver(connection.driver));
if (!validation.ok) {
throw new Error(validation.error ?? 'SQL is not read-only.');
}
const createConnector = options.localScan?.createConnector;
if (!createConnector) {
throw new Error('sql_execution requires a local scan connector factory.');
}
let connector: KtxScanConnector | null = null;
try {
connector = await createConnector(connectionId);
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
throw new Error(`Connection "${connectionId}" does not support read-only SQL execution.`);
}
const result = await connector.executeReadOnly(
{
connectionId,
sql: input.sql,
maxRows: input.maxRows,
},
{ runId: 'mcp-sql-execution' },
);
return {
headers: result.headers,
...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
rows: result.rows,
rowCount: result.rowCount ?? result.rows.length,
};
} finally {
await cleanupConnector(connector);
}
}
export function createLocalProjectMcpContextPorts(
project: KtxLocalProject,
options: CreateLocalProjectMcpContextPortsOptions = {},
@ -575,8 +632,31 @@ export function createLocalProjectMcpContextPorts(
});
},
},
entityDetails: {
async read(input) {
return createKtxEntityDetailsService(project).read(input);
},
},
dictionarySearch: {
async search(input) {
return createKtxDictionarySearchService(project).search(input);
},
},
discover: {
async search(input) {
return createKtxDiscoverDataService(project, { userId: 'local', embeddingService }).search(input);
},
},
};
if (options.sqlAnalysis && options.localScan?.createConnector) {
ports.sqlExecution = {
async execute(input) {
return executeValidatedReadOnlySql(project, options, input);
},
};
}
if (options.localIngest) {
ports.ingest = {
async trigger(input) {

View file

@ -6,11 +6,16 @@ import { createLocalProjectMemoryCapture } from '../memory/index.js';
import { initKtxProject } from '../project/index.js';
import { createKtxMcpServer } from './server.js';
import type {
KtxDiscoverDataMcpPort,
KtxDictionarySearchMcpPort,
KtxEntityDetailsMcpPort,
KtxIngestMcpPort,
KtxKnowledgeMcpPort,
KtxMcpContextPorts,
KtxScanMcpPort,
KtxSemanticLayerMcpPort,
KtxSqlExecutionMcpPort,
KtxSqlExecutionResponse,
MemoryCapturePort,
} from './types.js';
@ -64,6 +69,242 @@ describe('createKtxMcpServer', () => {
});
});
it('registers parser-gated sql_execution when the host provides a SQL execution port', async () => {
const fake = makeFakeServer();
const response: KtxSqlExecutionResponse = {
headers: ['status', 'count'],
headerTypes: ['text', 'bigint'],
rows: [['paid', 42]],
rowCount: 1,
};
const sqlExecution: KtxSqlExecutionMcpPort = {
execute: vi.fn<KtxSqlExecutionMcpPort['execute']>().mockResolvedValue(response),
};
createKtxMcpServer({
server: fake.server,
userContext: { userId: 'local-user' },
contextTools: {
sqlExecution,
},
});
expect(fake.tools.map((tool) => tool.name)).toEqual(['sql_execution']);
await expect(
getTool(fake.tools, 'sql_execution').handler({
connectionId: 'warehouse',
sql: 'select status, count(*) from public.orders group by status',
maxRows: 50,
}),
).resolves.toEqual({
content: [
{
type: 'text',
text: JSON.stringify(
{
headers: ['status', 'count'],
headerTypes: ['text', 'bigint'],
rows: [['paid', 42]],
rowCount: 1,
},
null,
2,
),
},
],
structuredContent: {
headers: ['status', 'count'],
headerTypes: ['text', 'bigint'],
rows: [['paid', 42]],
rowCount: 1,
},
});
expect(sqlExecution.execute).toHaveBeenCalledWith({
connectionId: 'warehouse',
sql: 'select status, count(*) from public.orders group by status',
maxRows: 50,
});
});
it('registers entity_details when the host provides an entity-details port', async () => {
const fake = makeFakeServer();
const entityDetails: KtxEntityDetailsMcpPort = {
read: vi.fn<KtxEntityDetailsMcpPort['read']>().mockResolvedValue({
results: [
{
ok: true,
connectionId: 'warehouse',
tableRef: { catalog: null, db: 'public', name: 'orders' },
display: 'public.orders',
kind: 'table',
comment: 'Customer orders',
estimatedRows: 12,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
snapshot: {
syncId: 'sync-1',
extractedAt: '2026-05-14T09:00:00.000Z',
scanRunId: 'scan-1',
},
},
],
}),
};
createKtxMcpServer({
server: fake.server,
userContext: { userId: 'local-user' },
contextTools: { entityDetails },
});
expect(fake.tools.map((tool) => tool.name)).toEqual(['entity_details']);
await expect(
getTool(fake.tools, 'entity_details').handler({
connectionId: 'warehouse',
entities: [{ table: 'public.orders', columns: ['id'] }],
}),
).resolves.toMatchObject({
structuredContent: {
results: [
{
ok: true,
connectionId: 'warehouse',
display: 'public.orders',
columns: [{ name: 'id' }],
},
],
},
});
expect(entityDetails.read).toHaveBeenCalledWith({
connectionId: 'warehouse',
entities: [{ table: 'public.orders', columns: ['id'] }],
});
});
it('registers dictionary_search when the host provides a dictionary-search port', async () => {
const fake = makeFakeServer();
const dictionarySearch: KtxDictionarySearchMcpPort = {
search: vi.fn<KtxDictionarySearchMcpPort['search']>().mockResolvedValue({
searched: [
{
connectionId: 'warehouse',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 1,
syncId: 'sync-1',
profiledAt: null,
},
status: 'ready',
},
],
results: [
{
value: 'paid',
matches: [
{
connectionId: 'warehouse',
sourceName: 'orders',
columnName: 'status',
matchedValue: 'paid',
cardinality: 3,
},
],
misses: [],
},
],
}),
};
createKtxMcpServer({
server: fake.server,
userContext: { userId: 'local-user' },
contextTools: { dictionarySearch },
});
expect(fake.tools.map((tool) => tool.name)).toEqual(['dictionary_search']);
await expect(
getTool(fake.tools, 'dictionary_search').handler({
connectionId: 'warehouse',
values: ['paid'],
}),
).resolves.toMatchObject({
structuredContent: {
searched: [{ connectionId: 'warehouse', status: 'ready' }],
results: [
{
value: 'paid',
matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status' }],
misses: [],
},
],
},
});
expect(dictionarySearch.search).toHaveBeenCalledWith({
connectionId: 'warehouse',
values: ['paid'],
});
});
it('registers discover_data when the host provides a discover port', async () => {
const fake = makeFakeServer();
const discover: KtxDiscoverDataMcpPort = {
search: vi.fn<KtxDiscoverDataMcpPort['search']>().mockResolvedValue([
{
kind: 'table',
id: 'public.orders',
score: 1,
summary: 'Orders table',
snippet: 'id, status',
matchedOn: 'name',
connectionId: 'warehouse',
tableRef: { catalog: null, db: 'public', name: 'orders' },
},
]),
};
createKtxMcpServer({
server: fake.server,
userContext: { userId: 'local-user' },
contextTools: { discover },
});
expect(fake.tools.map((tool) => tool.name)).toEqual(['discover_data']);
await expect(
getTool(fake.tools, 'discover_data').handler({
query: 'orders',
connectionId: 'warehouse',
kinds: ['table'],
limit: 5,
}),
).resolves.toMatchObject({
structuredContent: [
{
kind: 'table',
id: 'public.orders',
connectionId: 'warehouse',
tableRef: { catalog: null, db: 'public', name: 'orders' },
},
],
});
expect(discover.search).toHaveBeenCalledWith({
query: 'orders',
connectionId: 'warehouse',
kinds: ['table'],
limit: 5,
});
});
it('registers memory capture tools without host app dependencies', async () => {
const fake = makeFakeServer();
const capture: MemoryCapturePort = {

View file

@ -1,7 +1,11 @@
import type { IngestReportSnapshot, MemoryFlowReplayInput, TableUsageOutput } from '../ingest/index.js';
import type { MemoryCaptureService } from '../memory/index.js';
import type { KtxEntityDetailsInput, KtxEntityDetailsResponse } from '../scan/entity-details.js';
import type { KtxScanMode, KtxScanReport } from '../scan/index.js';
import type { KtxDiscoverDataInput, KtxDiscoverDataResponse } from '../search/index.js';
import type {
KtxDictionarySearchInput,
KtxDictionarySearchResponse,
SemanticLayerQueryInput,
SlDictionaryMatch,
SlSearchLaneSummary,
@ -312,10 +316,37 @@ export interface KtxScanMcpPort {
readArtifact?(input: { runId: string; path: string }): Promise<KtxScanArtifactReadResponse | null>;
}
export interface KtxEntityDetailsMcpPort {
read(input: KtxEntityDetailsInput): Promise<KtxEntityDetailsResponse>;
}
export interface KtxDictionarySearchMcpPort {
search(input: KtxDictionarySearchInput): Promise<KtxDictionarySearchResponse>;
}
export interface KtxDiscoverDataMcpPort {
search(input: KtxDiscoverDataInput): Promise<KtxDiscoverDataResponse>;
}
export interface KtxSqlExecutionResponse {
headers: string[];
headerTypes?: string[];
rows: unknown[][];
rowCount: number;
}
export interface KtxSqlExecutionMcpPort {
execute(input: { connectionId: string; sql: string; maxRows: number }): Promise<KtxSqlExecutionResponse>;
}
export interface KtxMcpContextPorts {
connections?: KtxConnectionsMcpPort;
knowledge?: KtxKnowledgeMcpPort;
semanticLayer?: KtxSemanticLayerMcpPort;
entityDetails?: KtxEntityDetailsMcpPort;
dictionarySearch?: KtxDictionarySearchMcpPort;
discover?: KtxDiscoverDataMcpPort;
sqlExecution?: KtxSqlExecutionMcpPort;
ingest?: KtxIngestMcpPort;
scan?: KtxScanMcpPort;
}

View file

@ -166,17 +166,17 @@ describe('memory runtime assets', () => {
}
});
it('ships only the KTX connectionName sql_execution call shape in writer guidance', async () => {
it('ships only the KTX connectionId sql_execution call shape in writer guidance', async () => {
const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
const bodies = [{ name: '_shared/identifier-verification.md', body: shared }];
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
expect(shared).toContain('sql_execution({connectionId, sql: "SELECT DISTINCT');
expect(shared).toContain('sql_execution({connectionId, sql: "SELECT 1 FROM');
for (const skillName of verificationWriterSkills) {
const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
bodies.push({ name: `${skillName}/SKILL.md`, body });
expect(body).toContain('sql_execution({connectionName');
expect(body).toContain('sql_execution({connectionId');
expect(body).not.toContain('sql_execution({ sql');
expect(body).not.toContain('session shape');
expect(body).not.toContain('connection is already pinned by the ingest session');
@ -186,8 +186,8 @@ describe('memory runtime assets', () => {
const calls = sqlExecutionCallBlocks(body);
expect(calls.length, `${name} should contain sql_execution guidance`).toBeGreaterThan(0);
expect(
calls.filter((call) => !call.includes('connectionName')),
`${name} has sql_execution calls without connectionName`,
calls.filter((call) => !call.includes('connectionId')),
`${name} has sql_execution calls without connectionId`,
).toEqual([]);
expect(body, `${name} has a connectionless multiline sql_execution call`).not.toMatch(
/sql_execution\(\{\s*sql\s*:/,

View file

@ -0,0 +1,291 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
import { createKtxEntityDetailsService } from './entity-details.js';
import type { KtxConnectionDriver, KtxScanReport, KtxSchemaTable } from './types.js';
describe('createKtxEntityDetailsService', () => {
let tempDir: string;
let project: KtxLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-entity-details-service-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
function scanReport(input: {
connectionId: string;
syncId: string;
runId: string;
driver?: KtxConnectionDriver;
createdAt?: string;
}): KtxScanReport {
const rawSourcesDir = `raw-sources/${input.connectionId}/live-database/${input.syncId}`;
return {
connectionId: input.connectionId,
driver: input.driver ?? 'postgres',
syncId: input.syncId,
runId: input.runId,
trigger: 'mcp',
mode: 'structural',
dryRun: false,
artifactPaths: {
rawSourcesDir,
reportPath: `${rawSourcesDir}/scan-report.json`,
manifestShards: [],
enrichmentArtifacts: [],
},
diffSummary: {
tablesAdded: 0,
tablesModified: 0,
tablesDeleted: 0,
tablesUnchanged: 1,
columnsAdded: 0,
columnsModified: 0,
columnsDeleted: 0,
},
manifestShardsWritten: 0,
structuralSyncStats: {
tablesCreated: 1,
tablesUpdated: 0,
tablesDeleted: 0,
columnsCreated: 0,
columnsUpdated: 0,
columnsDeleted: 0,
},
enrichment: {
dataDictionary: 'skipped',
tableDescriptions: 'skipped',
columnDescriptions: 'skipped',
embeddings: 'skipped',
deterministicRelationships: 'skipped',
llmRelationshipValidation: 'skipped',
statisticalValidation: 'skipped',
},
capabilityGaps: [],
warnings: [],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
createdAt: input.createdAt ?? '2026-05-14T09:00:00.000Z',
};
}
function ordersTable(input: { db?: string | null; estimatedRows?: number | null } = {}): KtxSchemaTable {
return {
catalog: null,
db: input.db ?? 'public',
name: 'orders',
kind: 'table',
comment: 'Customer orders',
estimatedRows: input.estimatedRows ?? 12,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
{
name: 'status',
nativeType: 'text',
normalizedType: 'text',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: 'Order status',
},
],
foreignKeys: [
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
],
};
}
async function seedScan(input: {
connectionId?: string;
syncId: string;
runId: string;
driver?: KtxConnectionDriver;
extractedAt?: string;
tables?: KtxSchemaTable[];
}): Promise<void> {
const connectionId = input.connectionId ?? 'warehouse';
const report = scanReport({
connectionId,
syncId: input.syncId,
runId: input.runId,
driver: input.driver,
createdAt: input.extractedAt,
});
const root = report.artifactPaths.rawSourcesDir;
await project.fileStore.writeFile(
`${root}/connection.json`,
JSON.stringify(
{
connectionId,
driver: report.driver,
extractedAt: input.extractedAt ?? report.createdAt,
scope: { schemas: ['public'] },
},
null,
2,
),
'ktx',
'ktx@example.com',
'seed connection',
);
for (const table of input.tables ?? [ordersTable()]) {
await project.fileStore.writeFile(
`${root}/tables/${table.db ?? 'default'}-${table.name}.json`,
JSON.stringify(table, null, 2),
'ktx',
'ktx@example.com',
`seed ${table.name}`,
);
}
await project.fileStore.writeFile(
`${root}/scan-report.json`,
JSON.stringify(report, null, 2),
'ktx',
'ktx@example.com',
'seed scan report',
);
}
it('returns the latest scan snapshot table details for a display string', async () => {
await seedScan({ syncId: 'sync-1', runId: 'scan-old', extractedAt: '2026-05-14T08:00:00.000Z' });
await seedScan({
syncId: 'sync-2',
runId: 'scan-new',
extractedAt: '2026-05-14T09:00:00.000Z',
tables: [ordersTable({ estimatedRows: 99 })],
});
const service = createKtxEntityDetailsService(project);
const result = await service.read({
connectionId: 'warehouse',
entities: [{ table: 'public.orders' }],
});
expect(result.results).toHaveLength(1);
expect(result.results[0]).toMatchObject({
ok: true,
connectionId: 'warehouse',
display: 'public.orders',
estimatedRows: 99,
snapshot: {
syncId: 'sync-2',
scanRunId: 'scan-new',
extractedAt: '2026-05-14T09:00:00.000Z',
},
columns: [
{ name: 'id', nativeType: 'integer', primaryKey: true },
{ name: 'status', nativeType: 'text', nullable: false },
],
});
});
it('filters requested columns while keeping full-table foreign keys', async () => {
await seedScan({ syncId: 'sync-1', runId: 'scan-1' });
const service = createKtxEntityDetailsService(project);
const result = await service.read({
connectionId: 'warehouse',
entities: [{ table: { catalog: null, db: 'public', name: 'orders' }, columns: ['status'] }],
});
expect(result.results[0]).toMatchObject({
ok: true,
columns: [{ name: 'status' }],
foreignKeys: [
{
fromColumn: 'customer_id',
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
},
],
});
});
it('returns a structured missing-scan error', async () => {
const service = createKtxEntityDetailsService(project);
const result = await service.read({
connectionId: 'warehouse',
entities: [{ table: 'public.orders' }],
});
expect(result.results).toEqual([
{
ok: false,
connectionId: 'warehouse',
table: 'public.orders',
error: {
code: 'scan_missing',
message: 'No live-database scan found for connection "warehouse"; run `ktx ingest warehouse` or `ktx scan warehouse`.',
},
},
]);
});
it('reports ambiguous bare table names across schemas', async () => {
await seedScan({
syncId: 'sync-1',
runId: 'scan-1',
tables: [ordersTable({ db: 'public' }), ordersTable({ db: 'archive' })],
});
const service = createKtxEntityDetailsService(project);
const result = await service.read({
connectionId: 'warehouse',
entities: [{ table: 'orders' }],
});
expect(result.results[0]).toMatchObject({
ok: false,
error: {
code: 'ambiguous_table',
candidates: [
{ tableRef: { catalog: null, db: 'archive', name: 'orders' }, display: 'archive.orders' },
{ tableRef: { catalog: null, db: 'public', name: 'orders' }, display: 'public.orders' },
],
},
});
});
it('reports missing requested columns with available column candidates', async () => {
await seedScan({ syncId: 'sync-1', runId: 'scan-1' });
const service = createKtxEntityDetailsService(project);
const result = await service.read({
connectionId: 'warehouse',
entities: [{ table: 'public.orders', columns: ['status', 'plan_tier'] }],
});
expect(result.results[0]).toMatchObject({
ok: false,
error: {
code: 'column_not_found',
message: 'Column(s) not found on public.orders: plan_tier',
candidates: ['id', 'status'],
},
});
});
});

View file

@ -0,0 +1,315 @@
import type { KtxLocalProject } from '../project/index.js';
import { readLocalScanStructuralSnapshot } from './local-structural-artifacts.js';
import type {
KtxConnectionDriver,
KtxScanReport,
KtxSchemaColumn,
KtxSchemaSnapshot,
KtxSchemaTable,
KtxTableRef,
} from './types.js';
export type KtxEntityDetailsTableInput = string | KtxTableRef;
export interface KtxEntityDetailsInput {
connectionId: string;
entities: Array<{
table: KtxEntityDetailsTableInput;
columns?: string[];
}>;
}
export interface KtxEntityDetailsSnapshotInfo {
syncId: string;
extractedAt: string;
scanRunId: string | null;
}
export interface KtxEntityDetailsColumn {
name: string;
nativeType: string;
normalizedType: string;
dimensionType: KtxSchemaColumn['dimensionType'];
nullable: boolean;
primaryKey: boolean;
comment: string | null;
}
export interface KtxEntityDetailsRecord {
ok: true;
connectionId: string;
tableRef: KtxTableRef;
display: string;
kind: KtxSchemaTable['kind'];
comment: string | null;
estimatedRows: number | null;
columns: KtxEntityDetailsColumn[];
foreignKeys: KtxSchemaTable['foreignKeys'];
snapshot: KtxEntityDetailsSnapshotInfo;
}
export type KtxEntityDetailsErrorCode = 'scan_missing' | 'table_not_found' | 'ambiguous_table' | 'column_not_found';
export interface KtxEntityDetailsErrorResult {
ok: false;
connectionId: string;
table: KtxEntityDetailsTableInput;
snapshot?: KtxEntityDetailsSnapshotInfo;
error: {
code: KtxEntityDetailsErrorCode;
message: string;
candidates?: Array<{ tableRef: KtxTableRef; display: string }> | string[];
};
}
export interface KtxEntityDetailsResponse {
results: Array<KtxEntityDetailsRecord | KtxEntityDetailsErrorResult>;
}
interface LatestScan {
report: KtxScanReport;
snapshot: KtxSchemaSnapshot;
}
interface ResolveResult {
table: KtxSchemaTable | null;
error?: Omit<KtxEntityDetailsErrorResult['error'], 'message'> & { message: string };
}
function normalize(value: string | null | undefined): string {
return (value ?? '').toLowerCase();
}
function refsEqual(left: KtxTableRef, right: KtxTableRef): boolean {
return (
normalize(left.catalog) === normalize(right.catalog) &&
normalize(left.db) === normalize(right.db) &&
normalize(left.name) === normalize(right.name)
);
}
function cleanIdentifierPart(part: string): string {
return part.trim().replace(/^["'`\[]|["'`\]]$/g, '');
}
function splitDisplay(display: string): string[] {
return display
.trim()
.split('.')
.map(cleanIdentifierPart)
.filter(Boolean);
}
function displayForTable(driver: KtxConnectionDriver, table: KtxTableRef): string {
if (driver === 'sqlite') {
return table.name;
}
return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.');
}
function tableRef(table: KtxSchemaTable): KtxTableRef {
return { catalog: table.catalog, db: table.db, name: table.name };
}
function candidateList(
driver: KtxConnectionDriver,
tables: KtxSchemaTable[],
): Array<{ tableRef: KtxTableRef; display: string }> {
return tables
.map((table) => ({
tableRef: tableRef(table),
display: displayForTable(driver, table),
}))
.sort((left, right) => left.display.localeCompare(right.display));
}
function parseDisplayRef(driver: KtxConnectionDriver, display: string): KtxTableRef | null {
const parts = splitDisplay(display);
if (driver === 'sqlite') {
return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null;
}
if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') {
return parts.length === 3 ? { catalog: parts[0]!, db: parts[1]!, name: parts[2]! } : null;
}
if (parts.length === 2) {
return { catalog: null, db: parts[0]!, name: parts[1]! };
}
if (parts.length === 3) {
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
}
return null;
}
function resolveTable(snapshot: KtxSchemaSnapshot, input: KtxEntityDetailsTableInput): ResolveResult {
if (typeof input !== 'string') {
const table = snapshot.tables.find((candidate) => refsEqual(candidate, input)) ?? null;
return table
? { table }
: {
table: null,
error: {
code: 'table_not_found',
message: `Table not found in latest scan: ${displayForTable(snapshot.driver, input)}`,
candidates: candidateList(snapshot.driver, snapshot.tables),
},
};
}
const parsed = parseDisplayRef(snapshot.driver, input);
if (parsed) {
const table = snapshot.tables.find((candidate) => refsEqual(candidate, parsed)) ?? null;
return table
? { table }
: {
table: null,
error: {
code: 'table_not_found',
message: `Table not found in latest scan: ${input}`,
candidates: candidateList(snapshot.driver, snapshot.tables),
},
};
}
const byName = snapshot.tables.filter((candidate) => normalize(candidate.name) === normalize(input));
if (byName.length === 1) {
return { table: byName[0]! };
}
if (byName.length > 1) {
return {
table: null,
error: {
code: 'ambiguous_table',
message: `Table name "${input}" is ambiguous across schemas/catalogs; pass a structured table ref.`,
candidates: candidateList(snapshot.driver, byName),
},
};
}
return {
table: null,
error: {
code: 'table_not_found',
message: `Table not found in latest scan: ${input}`,
candidates: candidateList(snapshot.driver, snapshot.tables),
},
};
}
function toColumn(column: KtxSchemaColumn): KtxEntityDetailsColumn {
return {
name: column.name,
nativeType: column.nativeType,
normalizedType: column.normalizedType,
dimensionType: column.dimensionType,
nullable: column.nullable,
primaryKey: column.primaryKey,
comment: column.comment,
};
}
function snapshotInfo(report: KtxScanReport, snapshot: KtxSchemaSnapshot): KtxEntityDetailsSnapshotInfo {
return {
syncId: report.syncId,
extractedAt: snapshot.extractedAt,
scanRunId: report.runId ?? null,
};
}
async function readJson<T>(project: KtxLocalProject, path: string): Promise<T> {
return JSON.parse((await project.fileStore.readFile(path)).content) as T;
}
async function latestScan(project: KtxLocalProject, connectionId: string): Promise<LatestScan | null> {
const root = `raw-sources/${connectionId}/live-database`;
let listed;
try {
listed = await project.fileStore.listFiles(root);
} catch {
return null;
}
const reportPath = listed.files.filter((path) => path.endsWith('/scan-report.json')).sort().at(-1);
if (!reportPath) {
return null;
}
const report = await readJson<KtxScanReport>(project, reportPath);
const rawSourcesDir = report.artifactPaths.rawSourcesDir ?? reportPath.slice(0, -'/scan-report.json'.length);
const snapshot = await readLocalScanStructuralSnapshot({
project,
connectionId,
driver: report.driver,
rawSourcesDir,
extractedAtFallback: report.createdAt,
});
return { report, snapshot };
}
export function createKtxEntityDetailsService(project: KtxLocalProject) {
return {
async read(input: KtxEntityDetailsInput): Promise<KtxEntityDetailsResponse> {
const scan = await latestScan(project, input.connectionId);
if (!scan) {
return {
results: input.entities.map((entity) => ({
ok: false,
connectionId: input.connectionId,
table: entity.table,
error: {
code: 'scan_missing',
message: `No live-database scan found for connection "${input.connectionId}"; run \`ktx ingest ${input.connectionId}\` or \`ktx scan ${input.connectionId}\`.`,
},
})),
};
}
const info = snapshotInfo(scan.report, scan.snapshot);
const results: KtxEntityDetailsResponse['results'] = [];
for (const entity of input.entities) {
const resolved = resolveTable(scan.snapshot, entity.table);
if (!resolved.table) {
results.push({
ok: false,
connectionId: input.connectionId,
table: entity.table,
snapshot: info,
error: resolved.error!,
});
continue;
}
const requested = new Set((entity.columns ?? []).map((column) => normalize(column)));
const columns = requested.size
? resolved.table.columns.filter((column) => requested.has(normalize(column.name)))
: resolved.table.columns;
if (requested.size && columns.length !== requested.size) {
const found = new Set(columns.map((column) => normalize(column.name)));
const missing = [...requested].filter((column) => !found.has(column));
results.push({
ok: false,
connectionId: input.connectionId,
table: entity.table,
snapshot: info,
error: {
code: 'column_not_found',
message: `Column(s) not found on ${displayForTable(scan.snapshot.driver, resolved.table)}: ${missing.join(', ')}`,
candidates: resolved.table.columns.map((column) => column.name),
},
});
continue;
}
results.push({
ok: true,
connectionId: input.connectionId,
tableRef: tableRef(resolved.table),
display: displayForTable(scan.snapshot.driver, resolved.table),
kind: resolved.table.kind,
comment: resolved.table.comment,
estimatedRows: resolved.table.estimatedRows,
columns: columns.map(toColumn),
foreignKeys: resolved.table.foreignKeys,
snapshot: info,
});
}
return { results };
},
};
}

View file

@ -60,6 +60,24 @@ export {
ktxScanErrorMessage,
skippedKtxScanEnrichmentSummary,
} from './enrichment-summary.js';
export type {
KtxEntityDetailsColumn,
KtxEntityDetailsErrorCode,
KtxEntityDetailsErrorResult,
KtxEntityDetailsInput,
KtxEntityDetailsRecord,
KtxEntityDetailsResponse,
KtxEntityDetailsSnapshotInfo,
KtxEntityDetailsTableInput,
} from './entity-details.js';
export { createKtxEntityDetailsService } from './entity-details.js';
export type {
DisplayTargetResolution,
RawSchemaHit,
TableDetail,
WarehouseCatalogServiceDeps,
} from './warehouse-catalog.js';
export { WarehouseCatalogService } from './warehouse-catalog.js';
export type {
KtxColumnSampleUpdate,
KtxDescriptionSource,

View file

@ -2,8 +2,8 @@ import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../project/index.js';
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
import { WarehouseCatalogService } from './warehouse-catalog.js';
describe('WarehouseCatalogService', () => {
let tempDir: string;
@ -18,8 +18,8 @@ describe('WarehouseCatalogService', () => {
await rm(tempDir, { recursive: true, force: true });
});
async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-2', driver = 'postgres') {
const root = `raw-sources/${connectionName}/live-database/${syncId}`;
async function seedLiveDatabaseScan(connectionId = 'warehouse', syncId = 'sync-2', driver = 'postgres') {
const root = `raw-sources/${connectionId}/live-database/${syncId}`;
const tableRef = {
catalog: driver === 'bigquery' ? 'analytics' : null,
db: driver === 'sqlite' ? null : 'public',
@ -27,7 +27,7 @@ describe('WarehouseCatalogService', () => {
};
await project.fileStore.writeFile(
`${root}/connection.json`,
JSON.stringify({ connectionId: connectionName, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
JSON.stringify({ connectionId, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
'ktx',
'ktx@example.com',
'seed connection',
@ -75,7 +75,7 @@ describe('WarehouseCatalogService', () => {
`${root}/enrichment/relationship-profile.json`,
JSON.stringify(
{
connectionId: connectionName,
connectionId,
driver,
sqlAvailable: true,
queryCount: 3,
@ -113,10 +113,10 @@ describe('WarehouseCatalogService', () => {
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
await expect(catalog.getLatestSyncId('warehouse')).resolves.toBe('sync-2');
const detail = await catalog.getTable({ connectionName: 'warehouse', catalog: null, db: 'public', name: 'orders' });
const detail = await catalog.getTable({ connectionId: 'warehouse', catalog: null, db: 'public', name: 'orders' });
expect(detail).toMatchObject({
connectionName: 'warehouse',
connectionId: 'warehouse',
display: 'public.orders',
rowCount: 12,
columns: [
@ -124,11 +124,20 @@ describe('WarehouseCatalogService', () => {
{ name: 'status', nativeType: 'text', sampleValues: ['paid', 'refunded'], distinctCount: 2 },
],
});
expect(detail).not.toHaveProperty(['connection', 'Name'].join(''));
const hits = await catalog.searchByName('warehouse', 'orders', 5);
expect(hits[0]).toMatchObject({
kind: 'table',
connectionId: 'warehouse',
display: 'public.orders',
});
expect(hits[0]).not.toHaveProperty(['connection', 'Name'].join(''));
});
it('returns scanAvailable=false when no live-database scan exists', async () => {
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
await expect(catalog.getTable({ connectionName: 'missing', catalog: null, db: 'public', name: 'orders' })).resolves.toBeNull();
await expect(catalog.getTable({ connectionId: 'missing', catalog: null, db: 'public', name: 'orders' })).resolves.toBeNull();
await expect(catalog.hasScan('missing')).resolves.toBe(false);
});

View file

@ -1,12 +1,12 @@
import { getDialectForDriver } from '../../../connections/index.js';
import type { KtxFileStorePort } from '../../../core/index.js';
import { getDialectForDriver } from '../connections/index.js';
import type { KtxFileStorePort } from '../core/index.js';
import type {
KtxConnectionDriver,
KtxSchemaColumn,
KtxSchemaForeignKey,
KtxSchemaTable,
KtxTableRef,
} from '../../../scan/types.js';
} from './types.js';
type CatalogDriver = KtxConnectionDriver | 'sqlite3';
@ -24,7 +24,7 @@ interface WarehouseColumnDetail extends KtxSchemaColumn {
}
export interface TableDetail {
connectionName: string;
connectionId: string;
catalog: string | null;
db: string | null;
name: string;
@ -40,14 +40,14 @@ export interface TableDetail {
export type RawSchemaHit =
| {
kind: 'table';
connectionName: string;
connectionId: string;
ref: KtxTableRef;
display: string;
matchedOn: 'name' | 'db' | 'comment' | 'description';
}
| {
kind: 'column';
connectionName: string;
connectionId: string;
ref: KtxTableRef & { column: string };
display: string;
matchedOn: 'name' | 'comment' | 'description';
@ -80,7 +80,7 @@ interface RelationshipProfileArtifact {
}
interface ConnectionCatalog {
connectionName: string;
connectionId: string;
syncId: string;
driver: CatalogDriver;
tables: KtxSchemaTable[];
@ -250,21 +250,21 @@ export class WarehouseCatalogService {
constructor(private readonly deps: WarehouseCatalogServiceDeps) {}
async hasScan(connectionName: string): Promise<boolean> {
return (await this.loadCatalog(connectionName)) !== null;
async hasScan(connectionId: string): Promise<boolean> {
return (await this.loadCatalog(connectionId)) !== null;
}
async getLatestSyncId(connectionName: string): Promise<string | null> {
return (await this.loadCatalog(connectionName))?.syncId ?? null;
async getLatestSyncId(connectionId: string): Promise<string | null> {
return (await this.loadCatalog(connectionId))?.syncId ?? null;
}
async listTables(connectionName: string): Promise<KtxTableRef[]> {
const catalog = await this.loadCatalog(connectionName);
async listTables(connectionId: string): Promise<KtxTableRef[]> {
const catalog = await this.loadCatalog(connectionId);
return catalog?.tables.map((table) => ({ catalog: table.catalog, db: table.db, name: table.name })) ?? [];
}
async getTable(ref: { connectionName: string } & KtxTableRef): Promise<TableDetail | null> {
const catalog = await this.loadCatalog(ref.connectionName);
async getTable(ref: { connectionId: string } & KtxTableRef): Promise<TableDetail | null> {
const catalog = await this.loadCatalog(ref.connectionId);
if (!catalog) {
return null;
}
@ -277,7 +277,7 @@ export class WarehouseCatalogService {
const profileColumns = catalog.profile?.columns ?? {};
return {
connectionName: ref.connectionName,
connectionId: ref.connectionId,
catalog: table.catalog,
db: table.db,
name: table.name,
@ -310,14 +310,14 @@ export class WarehouseCatalogService {
}
async resolveDisplay(
connectionName: string,
connectionId: string,
display: string,
): Promise<{
resolved: KtxTableRef | null;
candidates: KtxTableRef[];
dialect: string;
}> {
const catalog = await this.loadCatalog(connectionName);
const catalog = await this.loadCatalog(connectionId);
if (!catalog) {
return { resolved: null, candidates: [], dialect: 'unknown' };
}
@ -333,14 +333,14 @@ export class WarehouseCatalogService {
return { resolved: { catalog: table.catalog, db: table.db, name: table.name }, candidates: [], dialect };
}
async resolveDisplayTarget(connectionName: string, display: string): Promise<DisplayTargetResolution> {
const catalog = await this.loadCatalog(connectionName);
async resolveDisplayTarget(connectionId: string, display: string): Promise<DisplayTargetResolution> {
const catalog = await this.loadCatalog(connectionId);
if (!catalog) {
return { resolved: null, candidates: [], dialect: 'unknown' };
}
const dialect = getDialectForDriver(catalog.driver).type;
const tableResolution = await this.resolveDisplay(connectionName, display);
const tableResolution = await this.resolveDisplay(connectionId, display);
if (tableResolution.resolved) {
return tableResolution;
}
@ -367,8 +367,8 @@ export class WarehouseCatalogService {
};
}
async searchByName(connectionName: string, query: string, limit: number): Promise<RawSchemaHit[]> {
const catalog = await this.loadCatalog(connectionName);
async searchByName(connectionId: string, query: string, limit: number): Promise<RawSchemaHit[]> {
const catalog = await this.loadCatalog(connectionId);
if (!catalog) {
return [];
}
@ -378,7 +378,7 @@ export class WarehouseCatalogService {
if (tableMatch) {
hits.push({
kind: 'table',
connectionName,
connectionId,
ref: { catalog: table.catalog, db: table.db, name: table.name },
display: formatDisplay(catalog.driver, table),
matchedOn: tableMatch,
@ -391,7 +391,7 @@ export class WarehouseCatalogService {
}
hits.push({
kind: 'column',
connectionName,
connectionId,
ref: { catalog: table.catalog, db: table.db, name: table.name, column: column.name },
display: `${formatDisplay(catalog.driver, table)}.${column.name}`,
matchedOn: columnMatch,
@ -401,18 +401,18 @@ export class WarehouseCatalogService {
return hits.slice(0, Math.max(0, limit));
}
private loadCatalog(connectionName: string): Promise<ConnectionCatalog | null> {
const existing = this.catalogs.get(connectionName);
private loadCatalog(connectionId: string): Promise<ConnectionCatalog | null> {
const existing = this.catalogs.get(connectionId);
if (existing) {
return existing;
}
const pending = this.readCatalog(connectionName);
this.catalogs.set(connectionName, pending);
const pending = this.readCatalog(connectionId);
this.catalogs.set(connectionId, pending);
return pending;
}
private async readCatalog(connectionName: string): Promise<ConnectionCatalog | null> {
const root = `raw-sources/${connectionName}/live-database`;
private async readCatalog(connectionId: string): Promise<ConnectionCatalog | null> {
const root = `raw-sources/${connectionId}/live-database`;
const listed = await this.deps.fileStore.listFiles(root);
const connectionFiles = listed.files.filter((file) => file.endsWith('/connection.json')).sort();
const latestConnectionPath = connectionFiles.at(-1);
@ -438,7 +438,7 @@ export class WarehouseCatalogService {
}
return {
connectionName,
connectionId,
syncId,
driver: connection.driver ?? profile?.driver ?? 'postgres',
tables,

View file

@ -0,0 +1,264 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
import { writeLocalKnowledgePage } from '../wiki/local-knowledge.js';
import { createKtxDiscoverDataService } from './discover.js';
describe('createKtxDiscoverDataService', () => {
let tempDir: string;
let project: KtxLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-discover-data-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
project.config.connections.warehouse = { driver: 'postgres', url: 'env:DATABASE_URL' };
project.config.connections.billing = { driver: 'postgres', url: 'env:BILLING_DATABASE_URL' };
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
async function seedWiki(): Promise<void> {
await writeLocalKnowledgePage(project, {
key: 'orders-playbook',
scope: 'GLOBAL',
summary: 'Paid order operations',
content: 'Use paid orders and order_count to inspect monthly customer activity for Acme Corp.',
tags: ['orders'],
});
}
async function seedSl(): Promise<void> {
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
[
'name: orders',
'descriptions:',
' user: Paid order facts',
'table: public.orders',
'grain: [id]',
'columns:',
' - name: status',
' type: string',
' descriptions:',
' user: Payment status for the order',
' - name: ordered_at',
' type: time',
'measures:',
' - name: order_count',
' expr: count(*)',
' description: Number of paid orders',
'',
].join('\n'),
'ktx',
'ktx@example.com',
'seed sl source',
);
}
async function seedScan(input: {
connectionId?: string;
syncId: string;
tableName?: string;
comment?: string;
sampleValues?: string[];
}): Promise<void> {
const connectionId = input.connectionId ?? 'warehouse';
const root = `raw-sources/${connectionId}/live-database/${input.syncId}`;
const tableName = input.tableName ?? 'orders';
await project.fileStore.writeFile(
`${root}/connection.json`,
JSON.stringify(
{
connectionId,
driver: 'postgres',
extractedAt: `2026-05-14T09:00:00.000Z`,
scope: { schemas: ['public'] },
},
null,
2,
),
'ktx',
'ktx@example.com',
'seed scan connection',
);
await project.fileStore.writeFile(
`${root}/tables/public-${tableName}.json`,
JSON.stringify(
{
catalog: null,
db: 'public',
name: tableName,
kind: 'table',
comment: input.comment ?? 'Orders table from warehouse',
estimatedRows: 123,
descriptions: { db: input.comment ?? 'Orders table from warehouse' },
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
{
name: 'status',
nativeType: 'text',
normalizedType: 'text',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: 'Order status',
sampleValues: input.sampleValues ?? ['paid', 'pending'],
},
],
foreignKeys: [],
},
null,
2,
),
'ktx',
'ktx@example.com',
'seed table',
);
await project.fileStore.writeFile(
`${root}/scan-report.json`,
JSON.stringify(
{
connectionId,
driver: 'postgres',
syncId: input.syncId,
runId: `scan-${input.syncId}`,
trigger: 'mcp',
mode: 'enriched',
dryRun: false,
artifactPaths: {
rawSourcesDir: root,
reportPath: `${root}/scan-report.json`,
manifestShards: [],
enrichmentArtifacts: [],
},
diffSummary: {
tablesAdded: 1,
tablesModified: 0,
tablesDeleted: 0,
tablesUnchanged: 0,
columnsAdded: 0,
columnsModified: 0,
columnsDeleted: 0,
},
manifestShardsWritten: 0,
structuralSyncStats: {
tablesCreated: 0,
tablesUpdated: 0,
tablesDeleted: 0,
columnsCreated: 0,
columnsUpdated: 0,
columnsDeleted: 0,
},
enrichment: {
dataDictionary: 'completed',
tableDescriptions: 'completed',
columnDescriptions: 'completed',
embeddings: 'skipped',
deterministicRelationships: 'skipped',
llmRelationshipValidation: 'skipped',
statisticalValidation: 'skipped',
},
capabilityGaps: [],
warnings: [],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
createdAt: '2026-05-14T09:00:00.000Z',
},
null,
2,
),
'ktx',
'ktx@example.com',
'seed scan report',
);
}
it('returns unified ranked refs across wiki, semantic-layer, and raw schema', async () => {
await seedWiki();
await seedSl();
await seedScan({ syncId: 'sync-1', sampleValues: ['paid', 'refunded'] });
const service = createKtxDiscoverDataService(project, { userId: 'local-user' });
const results = await service.search({ query: 'paid orders', connectionId: 'warehouse', limit: 10 });
expect(results.map((result) => result.kind)).toEqual(
expect.arrayContaining(['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column']),
);
expect(results.every((result) => result.score >= 0 && result.score <= 1)).toBe(true);
expect(results.every((result) => result.snippet === null || result.snippet.length <= 200)).toBe(true);
expect(results).toContainEqual(
expect.objectContaining({
kind: 'table',
id: 'public.orders',
connectionId: 'warehouse',
tableRef: { catalog: null, db: 'public', name: 'orders' },
matchedOn: expect.stringMatching(/name|description|comment|display/),
}),
);
expect(results).toContainEqual(
expect.objectContaining({
kind: 'column',
id: 'public.orders.status',
connectionId: 'warehouse',
columnName: 'status',
matchedOn: expect.stringMatching(/name|comment|description|sample_value/),
}),
);
expect(results).toContainEqual(
expect.objectContaining({
kind: 'sl_measure',
id: 'orders.order_count',
connectionId: 'warehouse',
summary: 'Number of paid orders',
snippet: 'count(*)',
matchedOn: expect.stringMatching(/name|description|expr/),
}),
);
});
it('honors kind filters and connection scope', async () => {
await seedWiki();
await seedSl();
await seedScan({ syncId: 'sync-1', connectionId: 'warehouse', tableName: 'orders' });
await seedScan({ syncId: 'sync-2', connectionId: 'billing', tableName: 'invoices', comment: 'Billing invoices' });
const service = createKtxDiscoverDataService(project);
const results = await service.search({
query: 'orders',
connectionId: 'warehouse',
kinds: ['table', 'column'],
limit: 10,
});
expect(results.every((result) => result.kind === 'table' || result.kind === 'column')).toBe(true);
expect(results.every((result) => result.connectionId === 'warehouse')).toBe(true);
expect(results.some((result) => result.id.includes('invoices'))).toBe(false);
expect(results.some((result) => result.kind === 'wiki')).toBe(false);
});
it('re-reads the latest scan artifacts on each call', async () => {
await seedScan({ syncId: 'sync-1', tableName: 'orders', comment: 'Old orders table' });
const service = createKtxDiscoverDataService(project);
await expect(
service.search({ query: 'orders', connectionId: 'warehouse', kinds: ['table'], limit: 10 }),
).resolves.toEqual(expect.arrayContaining([expect.objectContaining({ id: 'public.orders' })]));
await seedScan({ syncId: 'sync-2', tableName: 'invoices', comment: 'Invoice facts' });
const fresh = await service.search({ query: 'invoice', connectionId: 'warehouse', kinds: ['table'], limit: 10 });
expect(fresh).toEqual(expect.arrayContaining([expect.objectContaining({ id: 'public.invoices' })]));
expect(fresh.some((result) => result.id === 'public.orders')).toBe(false);
});
});

View file

@ -0,0 +1,466 @@
import type { KtxEmbeddingPort } from '../core/index.js';
import type { KtxLocalProject } from '../project/index.js';
import type { KtxScanReport, KtxSchemaColumn, KtxSchemaTable, KtxTableRef } from '../scan/index.js';
import { DEFAULT_PRIORITY, loadLocalSlSourceRecords, resolveDescription } from '../sl/index.js';
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
import { HybridSearchCore, type FusedSearchCandidate, type SearchCandidateGenerator } from './index.js';
export type KtxDiscoverDataKind = 'wiki' | 'sl_source' | 'sl_measure' | 'sl_dimension' | 'table' | 'column';
export type KtxDiscoverDataMatchedOn = 'name' | 'display' | 'description' | 'comment' | 'expr' | 'sample_value' | 'body';
export interface KtxDiscoverDataInput {
query: string;
connectionId?: string;
kinds?: KtxDiscoverDataKind[];
limit?: number;
}
export interface KtxDiscoverDataRef {
kind: KtxDiscoverDataKind;
id: string;
score: number;
summary: string | null;
snippet: string | null;
matchedOn: KtxDiscoverDataMatchedOn;
connectionId?: string;
tableRef?: KtxTableRef;
columnName?: string;
}
export type KtxDiscoverDataResponse = KtxDiscoverDataRef[];
export interface KtxDiscoverDataServiceOptions {
userId?: string;
embeddingService?: KtxEmbeddingPort | null;
}
interface CandidateRecord {
ref: Omit<KtxDiscoverDataRef, 'score'>;
rankScore: number;
}
type RawTable = KtxSchemaTable & {
descriptions?: Record<string, string>;
columns: Array<KtxSchemaColumn & { descriptions?: Record<string, string>; sampleValues?: unknown[] }>;
};
interface LatestScan {
report: KtxScanReport;
rawSourcesDir: string;
tables: RawTable[];
}
const ALL_KINDS: KtxDiscoverDataKind[] = ['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column'];
function normalize(value: string | null | undefined): string {
return (value ?? '').toLowerCase();
}
function queryTerms(query: string): string[] {
return query
.toLowerCase()
.split(/[^a-z0-9_]+/u)
.map((term) => term.trim())
.filter(Boolean);
}
function hasKind(kinds: ReadonlySet<KtxDiscoverDataKind>, kind: KtxDiscoverDataKind): boolean {
return kinds.has(kind);
}
function cap200(value: string | null | undefined): string | null {
if (!value) {
return null;
}
const compact = value.replace(/\s+/g, ' ').trim();
return compact.length > 200 ? compact.slice(0, 200) : compact;
}
function snippetAround(text: string | null | undefined, terms: readonly string[]): string | null {
if (!text) {
return null;
}
const lower = text.toLowerCase();
const index =
terms
.map((term) => lower.indexOf(term))
.filter((position) => position >= 0)
.sort((a, b) => a - b)[0] ?? 0;
return cap200(text.slice(Math.max(0, index - 60), index + 140));
}
function textScore(value: string | null | undefined, terms: readonly string[]): number {
const haystack = normalize(value);
if (!haystack || terms.length === 0) {
return 0;
}
const matched = terms.filter((term) => haystack.includes(term)).length;
return matched / terms.length;
}
function bestField(
fields: Array<{ matchedOn: KtxDiscoverDataMatchedOn; text: string | null | undefined; weight: number }>,
terms: readonly string[],
): { matchedOn: KtxDiscoverDataMatchedOn; score: number; text: string | null } | null {
const scored = fields
.map((field) => ({
matchedOn: field.matchedOn,
score: textScore(field.text, terms) * field.weight,
text: field.text ?? null,
}))
.filter((field) => field.score > 0)
.sort((left, right) => right.score - left.score || left.matchedOn.localeCompare(right.matchedOn));
return scored[0] ?? null;
}
function displayForTable(table: KtxTableRef): string {
return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.');
}
function tableRef(table: KtxSchemaTable): KtxTableRef {
return { catalog: table.catalog, db: table.db, name: table.name };
}
async function readJson<T>(project: KtxLocalProject, path: string): Promise<T> {
return JSON.parse((await project.fileStore.readFile(path)).content) as T;
}
async function latestScan(project: KtxLocalProject, connectionId: string): Promise<LatestScan | null> {
const root = `raw-sources/${connectionId}/live-database`;
let files: string[];
try {
files = (await project.fileStore.listFiles(root)).files;
} catch {
return null;
}
const reportPath = files
.filter((path) => path.endsWith('/scan-report.json'))
.sort()
.at(-1);
if (!reportPath) {
return null;
}
const report = await readJson<KtxScanReport>(project, reportPath);
const rawSourcesDir = report.artifactPaths.rawSourcesDir ?? reportPath.slice(0, -'/scan-report.json'.length);
const listedTables = await project.fileStore.listFiles(`${rawSourcesDir}/tables`);
const tables: RawTable[] = [];
for (const path of listedTables.files.filter((file) => file.endsWith('.json')).sort()) {
tables.push(await readJson<RawTable>(project, path));
}
return { report, rawSourcesDir, tables };
}
function configuredConnectionIds(project: KtxLocalProject, connectionId?: string): string[] {
return connectionId ? [connectionId] : Object.keys(project.config.connections).sort();
}
async function wikiCandidates(
project: KtxLocalProject,
input: KtxDiscoverDataInput,
options: KtxDiscoverDataServiceOptions,
terms: readonly string[],
): Promise<CandidateRecord[]> {
const searchResults = await searchLocalKnowledgePages(project, {
query: input.query,
userId: options.userId,
embeddingService: options.embeddingService ?? null,
limit: Math.max(input.limit ?? 15, 25),
});
const records: CandidateRecord[] = [];
for (const result of searchResults) {
const page = await readLocalKnowledgePage(project, { key: result.key, userId: options.userId });
const content = page?.content ?? '';
const matched = bestField(
[
{ matchedOn: 'name', text: result.key, weight: 1.1 },
{ matchedOn: 'description', text: result.summary, weight: 1 },
{ matchedOn: 'body', text: content, weight: 0.8 },
],
terms,
);
records.push({
rankScore: result.score + (matched?.score ?? 0),
ref: {
kind: 'wiki',
id: result.key,
summary: result.summary || null,
snippet: snippetAround(content, terms),
matchedOn: matched?.matchedOn ?? 'body',
},
});
}
return records.sort((left, right) => right.rankScore - left.rankScore || left.ref.id.localeCompare(right.ref.id));
}
async function slCandidates(
project: KtxLocalProject,
input: KtxDiscoverDataInput,
kinds: ReadonlySet<KtxDiscoverDataKind>,
terms: readonly string[],
): Promise<CandidateRecord[]> {
const records: CandidateRecord[] = [];
for (const connectionId of configuredConnectionIds(project, input.connectionId)) {
const sources = await loadLocalSlSourceRecords(project, { connectionId }).catch(() => []);
for (const sourceRecord of sources) {
const source = sourceRecord.source;
if (hasKind(kinds, 'sl_source')) {
const description = resolveDescription(source.descriptions, { priority: DEFAULT_PRIORITY });
const matched = bestField(
[
{ matchedOn: 'name', text: source.name, weight: 1.2 },
{ matchedOn: 'description', text: description, weight: 1 },
{ matchedOn: 'display', text: source.table ?? source.sql ?? null, weight: 0.8 },
],
terms,
);
if (matched) {
records.push({
rankScore: matched.score,
ref: {
kind: 'sl_source',
id: source.name,
connectionId,
summary: description,
snippet:
matched.matchedOn === 'description'
? snippetAround(description, terms)
: cap200(
`${source.name}: ${[
...source.measures.map((measure) => measure.name),
...source.columns.map((column) => column.name),
]
.slice(0, 3)
.join(', ')}`,
),
matchedOn: matched.matchedOn,
},
});
}
}
if (hasKind(kinds, 'sl_measure')) {
for (const measure of source.measures) {
const matched = bestField(
[
{ matchedOn: 'name', text: measure.name, weight: 1.2 },
{ matchedOn: 'description', text: measure.description, weight: 1 },
{ matchedOn: 'expr', text: measure.expr, weight: 0.9 },
],
terms,
);
if (matched) {
records.push({
rankScore: matched.score,
ref: {
kind: 'sl_measure',
id: `${source.name}.${measure.name}`,
connectionId,
summary: measure.description ?? null,
snippet: cap200(measure.expr),
matchedOn: matched.matchedOn,
},
});
}
}
}
if (hasKind(kinds, 'sl_dimension')) {
for (const column of source.columns) {
const description = resolveDescription(column.descriptions, { priority: DEFAULT_PRIORITY });
const matched = bestField(
[
{ matchedOn: 'name', text: column.name, weight: 1.2 },
{ matchedOn: 'display', text: `${source.name}.${column.name}`, weight: 1.1 },
{ matchedOn: 'description', text: description, weight: 1 },
{ matchedOn: 'expr', text: column.expr, weight: 0.9 },
],
terms,
);
if (matched) {
records.push({
rankScore: matched.score,
ref: {
kind: 'sl_dimension',
id: `${source.name}.${column.name}`,
connectionId,
summary: description,
snippet: cap200(`${column.name} (${column.type})`),
matchedOn: matched.matchedOn,
},
});
}
}
}
}
}
return records.sort((left, right) => right.rankScore - left.rankScore || left.ref.id.localeCompare(right.ref.id));
}
async function rawCandidates(
project: KtxLocalProject,
input: KtxDiscoverDataInput,
kinds: ReadonlySet<KtxDiscoverDataKind>,
terms: readonly string[],
): Promise<CandidateRecord[]> {
const records: CandidateRecord[] = [];
for (const connectionId of configuredConnectionIds(project, input.connectionId)) {
const scan = await latestScan(project, connectionId);
if (!scan) {
continue;
}
for (const table of scan.tables) {
const ref = tableRef(table);
const display = displayForTable(ref);
const tableDescription = resolveDescription(table.descriptions, { priority: DEFAULT_PRIORITY }) ?? table.comment;
if (hasKind(kinds, 'table')) {
const matched = bestField(
[
{ matchedOn: 'name', text: table.name, weight: 1.2 },
{ matchedOn: 'display', text: display, weight: 1.1 },
{ matchedOn: 'description', text: tableDescription, weight: 1 },
{ matchedOn: 'comment', text: table.comment, weight: 1 },
],
terms,
);
if (matched) {
records.push({
rankScore: matched.score,
ref: {
kind: 'table',
id: display,
connectionId,
tableRef: ref,
summary: tableDescription,
snippet:
matched.matchedOn === 'description' || matched.matchedOn === 'comment'
? snippetAround(matched.text, terms)
: cap200(table.columns.slice(0, 5).map((column) => column.name).join(', ')),
matchedOn: matched.matchedOn,
},
});
}
}
if (hasKind(kinds, 'column')) {
for (const column of table.columns) {
const columnDescription = resolveDescription(column.descriptions, { priority: DEFAULT_PRIORITY }) ?? column.comment;
const samples = (column.sampleValues ?? []).map((value) => String(value)).slice(0, 5);
const matched = bestField(
[
{ matchedOn: 'name', text: column.name, weight: 1.2 },
{ matchedOn: 'display', text: `${display}.${column.name}`, weight: 1.1 },
{ matchedOn: 'description', text: columnDescription, weight: 1 },
{ matchedOn: 'comment', text: column.comment, weight: 1 },
{ matchedOn: 'sample_value', text: samples.join(' '), weight: 1.3 },
],
terms,
);
if (matched) {
records.push({
rankScore: matched.score,
ref: {
kind: 'column',
id: `${display}.${column.name}`,
connectionId,
tableRef: ref,
columnName: column.name,
summary: columnDescription,
snippet:
matched.matchedOn === 'sample_value'
? cap200(`${column.nativeType} - samples: ${samples.join(', ')}`)
: matched.matchedOn === 'description' || matched.matchedOn === 'comment'
? snippetAround(matched.text, terms)
: cap200(column.nativeType),
matchedOn: matched.matchedOn,
},
});
}
}
}
}
}
return records.sort((left, right) => right.rankScore - left.rankScore || left.ref.id.localeCompare(right.ref.id));
}
function generator(
name: string,
candidates: CandidateRecord[],
refsByKey: Map<string, Omit<KtxDiscoverDataRef, 'score'>>,
): SearchCandidateGenerator {
candidates.forEach((candidate) =>
refsByKey.set(`${candidate.ref.kind}:${candidate.ref.connectionId ?? ''}:${candidate.ref.id}`, candidate.ref),
);
return {
lane: name,
weight: 1,
async generate() {
return {
candidates: candidates.map((candidate, index) => ({
id: `${candidate.ref.kind}:${candidate.ref.connectionId ?? ''}:${candidate.ref.id}`,
rank: index + 1,
rawScore: candidate.rankScore,
})),
};
},
};
}
function hydrate(
fused: FusedSearchCandidate[],
refsByKey: Map<string, Omit<KtxDiscoverDataRef, 'score'>>,
): KtxDiscoverDataRef[] {
const maxScore = Math.max(...fused.map((candidate) => candidate.score), 0);
return fused
.map((candidate) => {
const ref = refsByKey.get(candidate.id);
if (!ref) {
return null;
}
return {
...ref,
score: maxScore > 0 ? Number((candidate.score / maxScore).toFixed(6)) : 0,
};
})
.filter((result): result is KtxDiscoverDataRef => result !== null);
}
export function createKtxDiscoverDataService(
project: KtxLocalProject,
options: KtxDiscoverDataServiceOptions = {},
): { search(input: KtxDiscoverDataInput): Promise<KtxDiscoverDataResponse> } {
return {
async search(input) {
const limit = Math.max(1, Math.min(input.limit ?? 15, 50));
const query = input.query.trim();
if (!query) {
return [];
}
const kinds = new Set(input.kinds ?? ALL_KINDS);
const terms = queryTerms(query);
const refsByKey = new Map<string, Omit<KtxDiscoverDataRef, 'score'>>();
const generators: SearchCandidateGenerator[] = [];
if (hasKind(kinds, 'wiki')) {
generators.push(generator('wiki', await wikiCandidates(project, { ...input, limit }, options, terms), refsByKey));
}
if (hasKind(kinds, 'sl_source') || hasKind(kinds, 'sl_measure') || hasKind(kinds, 'sl_dimension')) {
generators.push(generator('semantic_layer', await slCandidates(project, { ...input, limit }, kinds, terms), refsByKey));
}
if (hasKind(kinds, 'table') || hasKind(kinds, 'column')) {
generators.push(generator('raw_schema', await rawCandidates(project, { ...input, limit }, kinds, terms), refsByKey));
}
if (generators.length === 0) {
return [];
}
const result = await new HybridSearchCore().search({
queryText: query,
limit,
generators,
laneWeights: { wiki: 1, semantic_layer: 1, raw_schema: 1 },
});
return hydrate(result.results, refsByKey);
},
};
}

View file

@ -10,6 +10,15 @@ export {
assertSearchBackendCapabilities,
assertSearchBackendConformanceCase,
} from './backend-conformance.js';
export { createKtxDiscoverDataService } from './discover.js';
export type {
KtxDiscoverDataInput,
KtxDiscoverDataKind,
KtxDiscoverDataMatchedOn,
KtxDiscoverDataRef,
KtxDiscoverDataResponse,
KtxDiscoverDataServiceOptions,
} from './discover.js';
export { HybridSearchCore } from './hybrid-search-core.js';
export { defaultLaneCandidatePoolLimit, normalizeSearchQuery } from './query.js';
export {

View file

@ -0,0 +1,228 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
import { createKtxDictionarySearchService } from './dictionary-search.js';
describe('createKtxDictionarySearchService', () => {
let tempDir: string;
let project: KtxLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-dictionary-search-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
project.config.connections.warehouse = { driver: 'postgres', url: 'env:DATABASE_URL' };
project.config.connections.billing = { driver: 'postgres', url: 'env:BILLING_DATABASE_URL' };
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
async function seedProfile(input: {
connectionId: string;
syncId: string;
columns: Record<string, unknown>;
}): Promise<void> {
await project.fileStore.writeFile(
`raw-sources/${input.connectionId}/live-database/${input.syncId}/enrichment/relationship-profile.json`,
`${JSON.stringify(
{
connectionId: input.connectionId,
driver: 'postgres',
sqlAvailable: true,
queryCount: 4,
tables: [],
columns: input.columns,
warnings: [],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed relationship profile',
);
}
it('returns matches and non-authoritative misses across configured connections', async () => {
await seedProfile({
connectionId: 'warehouse',
syncId: 'sync-1',
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 3,
sampleValues: ['paid', 'refunded', 'pending'],
},
},
});
await seedProfile({
connectionId: 'billing',
syncId: 'sync-2',
columns: {
'customers.name': {
table: { catalog: null, db: 'public', name: 'customers' },
column: 'name',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 4,
sampleValues: ['Acme Corp', 'Globex'],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ values: ['PAID', 'missing'] })).resolves.toEqual({
searched: [
{
connectionId: 'billing',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 1,
syncId: 'sync-2',
profiledAt: null,
},
status: 'ready',
},
{
connectionId: 'warehouse',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 1,
syncId: 'sync-1',
profiledAt: null,
},
status: 'ready',
},
],
results: [
{
value: 'PAID',
matches: [
{
connectionId: 'warehouse',
sourceName: 'orders',
columnName: 'status',
matchedValue: 'paid',
cardinality: 3,
},
],
misses: [{ connectionId: 'billing', reason: 'value_not_in_sample' }],
},
{
value: 'missing',
matches: [],
misses: [
{ connectionId: 'billing', reason: 'value_not_in_sample' },
{ connectionId: 'warehouse', reason: 'value_not_in_sample' },
],
},
],
});
});
it('distinguishes missing profile artifacts from profiles with no candidate columns', async () => {
await seedProfile({
connectionId: 'billing',
syncId: 'sync-empty',
columns: {
'events.id': {
table: { catalog: null, db: 'public', name: 'events' },
column: 'id',
nativeType: 'integer',
normalizedType: 'integer',
distinctCount: 100,
sampleValues: [1, 2, 3],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ values: ['Acme'] })).resolves.toEqual({
searched: [
{
connectionId: 'billing',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: 'sync-empty',
profiledAt: null,
},
status: 'no_candidate_columns',
},
{
connectionId: 'warehouse',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: null,
profiledAt: null,
},
status: 'no_profile_artifact',
},
],
results: [
{
value: 'Acme',
matches: [],
misses: [
{ connectionId: 'billing', reason: 'no_candidate_columns' },
{ connectionId: 'warehouse', reason: 'no_profile_artifact' },
],
},
],
});
});
it('scopes search to the requested connection', async () => {
await seedProfile({
connectionId: 'warehouse',
syncId: 'sync-1',
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 3,
sampleValues: ['paid'],
},
},
});
await seedProfile({
connectionId: 'billing',
syncId: 'sync-2',
columns: {
'invoices.status': {
table: { catalog: null, db: 'public', name: 'invoices' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 2,
sampleValues: ['paid'],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ connectionId: 'billing', values: ['paid'] })).resolves.toMatchObject({
searched: [{ connectionId: 'billing', status: 'ready' }],
results: [
{
value: 'paid',
matches: [{ connectionId: 'billing', sourceName: 'invoices', columnName: 'status', matchedValue: 'paid' }],
misses: [],
},
],
});
});
});

View file

@ -0,0 +1,214 @@
import type { KtxLocalProject } from '../project/index.js';
import { loadLatestSlDictionaryEntries, type SlDictionaryEntry } from './sl-dictionary-profile.js';
export type KtxDictionarySearchStatus = 'ready' | 'no_profile_artifact' | 'no_candidate_columns';
export type KtxDictionarySearchMissReason = 'no_profile_artifact' | 'no_candidate_columns' | 'value_not_in_sample';
export interface KtxDictionarySearchInput {
values: string[];
connectionId?: string;
}
export interface KtxDictionarySearchCoverage {
sampledRows: number | null;
valuesPerColumn: number | null;
profiledColumns: number;
syncId: string | null;
profiledAt: string | null;
}
export interface KtxDictionarySearchSearchedConnection {
connectionId: string;
coverage: KtxDictionarySearchCoverage;
status: KtxDictionarySearchStatus;
}
export interface KtxDictionarySearchMatch {
connectionId: string;
sourceName: string;
columnName: string;
matchedValue: string;
cardinality: number | null;
}
export interface KtxDictionarySearchMiss {
connectionId: string;
reason: KtxDictionarySearchMissReason;
}
export interface KtxDictionarySearchValueResult {
value: string;
matches: KtxDictionarySearchMatch[];
misses: KtxDictionarySearchMiss[];
}
export interface KtxDictionarySearchResponse {
searched: KtxDictionarySearchSearchedConnection[];
results: KtxDictionarySearchValueResult[];
}
interface RelationshipProfileArtifact {
connectionId?: string;
profileSampleRows?: unknown;
sampleValuesPerColumn?: unknown;
profiledAt?: unknown;
extractedAt?: unknown;
}
function uniqueSorted(values: Iterable<string>): string[] {
return [...new Set([...values].filter((value) => value.trim().length > 0))].sort((left, right) =>
left.localeCompare(right),
);
}
function latestProfileSyncId(path: string): string | null {
const parts = path.split('/');
return parts.at(-3) ?? null;
}
function optionalNumber(value: unknown): number | null {
return typeof value === 'number' && Number.isFinite(value) ? value : null;
}
function optionalString(value: unknown): string | null {
return typeof value === 'string' && value.trim().length > 0 ? value : null;
}
async function latestProfilePath(project: KtxLocalProject, connectionId: string): Promise<string | null> {
const root = `raw-sources/${connectionId}/live-database`;
let files: string[];
try {
files = (await project.fileStore.listFiles(root)).files;
} catch {
return null;
}
return (
files
.filter((path) => path.endsWith('/enrichment/relationship-profile.json'))
.sort((left, right) => left.localeCompare(right))
.at(-1) ?? null
);
}
async function readProfile(project: KtxLocalProject, path: string): Promise<RelationshipProfileArtifact> {
const raw = await project.fileStore.readFile(path);
const parsed = JSON.parse(raw.content) as unknown;
return typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)
? (parsed as RelationshipProfileArtifact)
: {};
}
function profiledColumnCount(entries: readonly SlDictionaryEntry[]): number {
return new Set(entries.map((entry) => `${entry.sourceName}\u001f${entry.columnName}`)).size;
}
async function searchedConnection(
project: KtxLocalProject,
connectionId: string,
entries: readonly SlDictionaryEntry[],
): Promise<KtxDictionarySearchSearchedConnection> {
const path = await latestProfilePath(project, connectionId);
if (!path) {
return {
connectionId,
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: null,
profiledAt: null,
},
status: 'no_profile_artifact',
};
}
const profile = await readProfile(project, path);
const count = profiledColumnCount(entries);
return {
connectionId,
coverage: {
sampledRows: optionalNumber(profile.profileSampleRows),
valuesPerColumn: optionalNumber(profile.sampleValuesPerColumn),
profiledColumns: count,
syncId: latestProfileSyncId(path),
profiledAt: optionalString(profile.profiledAt) ?? optionalString(profile.extractedAt),
},
status: count > 0 ? 'ready' : 'no_candidate_columns',
};
}
function entryMatchesValue(entry: SlDictionaryEntry, value: string): boolean {
return entry.value.toLowerCase().includes(value.toLowerCase());
}
function toMatch(entry: SlDictionaryEntry): KtxDictionarySearchMatch {
return {
connectionId: entry.connectionId,
sourceName: entry.sourceName,
columnName: entry.columnName,
matchedValue: entry.value,
cardinality: entry.cardinality,
};
}
function sortMatches(matches: KtxDictionarySearchMatch[]): KtxDictionarySearchMatch[] {
return matches.sort(
(left, right) =>
left.connectionId.localeCompare(right.connectionId) ||
left.sourceName.localeCompare(right.sourceName) ||
left.columnName.localeCompare(right.columnName) ||
left.matchedValue.localeCompare(right.matchedValue),
);
}
function missReason(status: KtxDictionarySearchStatus): KtxDictionarySearchMissReason {
return status === 'ready' ? 'value_not_in_sample' : status;
}
export function createKtxDictionarySearchService(project: KtxLocalProject): {
search(input: KtxDictionarySearchInput): Promise<KtxDictionarySearchResponse>;
} {
return {
async search(input) {
const connectionIds = input.connectionId
? [input.connectionId]
: uniqueSorted(Object.keys(project.config.connections));
const entries = await loadLatestSlDictionaryEntries(project, connectionIds);
const entriesByConnection = new Map<string, SlDictionaryEntry[]>();
for (const connectionId of connectionIds) {
entriesByConnection.set(
connectionId,
entries.filter((entry) => entry.connectionId === connectionId),
);
}
const searched = (
await Promise.all(
connectionIds.map((connectionId) =>
searchedConnection(project, connectionId, entriesByConnection.get(connectionId) ?? []),
),
)
).sort((left, right) => left.connectionId.localeCompare(right.connectionId));
const searchedByConnection = new Map(searched.map((connection) => [connection.connectionId, connection]));
return {
searched,
results: input.values.map((value) => {
const matches = sortMatches(entries.filter((entry) => entryMatchesValue(entry, value)).map(toMatch));
const matchedConnections = new Set(matches.map((match) => match.connectionId));
return {
value,
matches,
misses: searched
.filter((connection) => !matchedConnections.has(connection.connectionId))
.map((connection) => ({
connectionId: connection.connectionId,
reason: missReason(searchedByConnection.get(connection.connectionId)?.status ?? 'no_profile_artifact'),
})),
};
}),
};
},
};
}

View file

@ -25,6 +25,18 @@ export {
} from './semantic-layer.service.js';
export { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
export type { SlDictionaryEntry } from './sl-dictionary-profile.js';
export { createKtxDictionarySearchService } from './dictionary-search.js';
export type {
KtxDictionarySearchCoverage,
KtxDictionarySearchInput,
KtxDictionarySearchMatch,
KtxDictionarySearchMiss,
KtxDictionarySearchMissReason,
KtxDictionarySearchResponse,
KtxDictionarySearchSearchedConnection,
KtxDictionarySearchStatus,
KtxDictionarySearchValueResult,
} from './dictionary-search.js';
export { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
export { SqliteSlSourcesIndex, type SqliteSlSourcesIndexOptions } from './sqlite-sl-sources-index.js';
export * from './local-sl.js';

View file

@ -108,6 +108,44 @@ describe('createHttpSqlAnalysisPort', () => {
});
});
it('maps read-only SQL validation responses', async () => {
const requests: Array<{ path: string; payload: Record<string, unknown> }> = [];
const port = createHttpSqlAnalysisPort({
baseUrl: 'http://127.0.0.1:8765',
requestJson: async (path, payload) => {
requests.push({ path, payload });
return { ok: false, error: 'SQL contains read/write operation: Insert' };
},
});
await expect(
port.validateReadOnly('with x as (insert into t values (1)) select * from x', 'postgres'),
).resolves.toEqual({
ok: false,
error: 'SQL contains read/write operation: Insert',
});
expect(requests).toEqual([
{
path: '/sql/validate-read-only',
payload: {
dialect: 'postgres',
sql: 'with x as (insert into t values (1)) select * from x',
},
},
]);
});
it('rejects malformed read-only validation responses', async () => {
const port = createHttpSqlAnalysisPort({
baseUrl: 'http://127.0.0.1:8765',
requestJson: async () => ({ ok: 'yes' }),
});
await expect(port.validateReadOnly('select 1', 'postgres')).rejects.toThrow(
'sql analysis response is missing boolean field ok',
);
});
it('rejects malformed SQL batch responses instead of inventing defaults', async () => {
const requestJson = vi.fn(async () => ({
results: {

View file

@ -9,6 +9,7 @@ import type {
SqlAnalysisLiteralSlot,
SqlAnalysisLiteralSlotType,
SqlAnalysisPort,
SqlReadOnlyValidationResult,
} from './ports.js';
export type KtxSqlAnalysisHttpJsonRunner = (
@ -96,6 +97,14 @@ function requiredStringArray(raw: Record<string, unknown>, field: string): strin
return value;
}
function requiredBoolean(raw: Record<string, unknown>, field: string): boolean {
const value = raw[field];
if (typeof value !== 'boolean') {
throw new Error(`sql analysis response is missing boolean field ${field}`);
}
return value;
}
function requiredObject(raw: Record<string, unknown>, field: string): Record<string, unknown> {
const value = raw[field];
if (!value || typeof value !== 'object' || Array.isArray(value)) {
@ -187,6 +196,14 @@ function mapBatchResponse(raw: Record<string, unknown>): Map<string, SqlAnalysis
);
}
function mapReadOnlyValidation(raw: Record<string, unknown>): SqlReadOnlyValidationResult {
const error = optionalString(raw, 'error');
return {
ok: requiredBoolean(raw, 'ok'),
...(error !== undefined ? { error } : {}),
};
}
export function createHttpSqlAnalysisPort(options: HttpSqlAnalysisPortOptions): SqlAnalysisPort {
const requestJson = options.requestJson ?? postJson(options.baseUrl);
@ -205,5 +222,12 @@ export function createHttpSqlAnalysisPort(options: HttpSqlAnalysisPortOptions):
});
return mapBatchResponse(raw);
},
async validateReadOnly(sql: string, dialect: SqlAnalysisDialect) {
const raw = await requestJson('/sql/validate-read-only', {
dialect,
sql,
});
return mapReadOnlyValidation(raw);
},
};
}

View file

@ -9,4 +9,5 @@ export type {
SqlAnalysisLiteralSlot,
SqlAnalysisLiteralSlotType,
SqlAnalysisPort,
SqlReadOnlyValidationResult,
} from './ports.js';

View file

@ -38,10 +38,16 @@ export interface SqlAnalysisBatchResult {
error?: string | null;
}
export interface SqlReadOnlyValidationResult {
ok: boolean;
error?: string | null;
}
export interface SqlAnalysisPort {
analyzeForFingerprint(sql: string, dialect: SqlAnalysisDialect): Promise<SqlAnalysisFingerprintResult>;
analyzeBatch(
items: SqlAnalysisBatchItem[],
dialect: SqlAnalysisDialect,
): Promise<Map<string, SqlAnalysisBatchResult>>;
validateReadOnly(sql: string, dialect: SqlAnalysisDialect): Promise<SqlReadOnlyValidationResult>;
}