mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
feat(mcp):added MCP server (#97)
* docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
parent
c7b64379bf
commit
b759a4a286
78 changed files with 13689 additions and 190 deletions
|
|
@ -143,6 +143,45 @@ const scanArtifactReadSchema = z.object({
|
|||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
const entityDetailsTableRefSchema = z.object({
|
||||
catalog: z.string().nullable(),
|
||||
db: z.string().nullable(),
|
||||
name: z.string().min(1),
|
||||
});
|
||||
|
||||
const entityDetailsSchema = z.object({
|
||||
connectionId: connectionIdSchema,
|
||||
entities: z
|
||||
.array(
|
||||
z.object({
|
||||
table: z.union([z.string().min(1), entityDetailsTableRefSchema]),
|
||||
columns: z.array(z.string().min(1)).optional(),
|
||||
}),
|
||||
)
|
||||
.min(1)
|
||||
.max(20),
|
||||
});
|
||||
|
||||
const dictionarySearchSchema = z.object({
|
||||
values: z.array(z.string().min(1)).min(1).max(20),
|
||||
connectionId: connectionIdSchema.optional(),
|
||||
});
|
||||
|
||||
const discoverDataKindSchema = z.enum(['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column']);
|
||||
|
||||
const discoverDataSchema = z.object({
|
||||
query: z.string().min(1),
|
||||
connectionId: connectionIdSchema.optional(),
|
||||
kinds: z.array(discoverDataKindSchema).optional(),
|
||||
limit: z.number().int().min(1).max(50).default(15).optional(),
|
||||
});
|
||||
|
||||
const sqlExecutionSchema = z.object({
|
||||
connectionId: connectionIdSchema,
|
||||
sql: z.string().min(1),
|
||||
maxRows: z.number().int().min(1).max(10_000).default(1000).optional(),
|
||||
});
|
||||
|
||||
export function jsonToolResult<T extends object>(structuredContent: T): KtxMcpToolResult<T> {
|
||||
return {
|
||||
content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }],
|
||||
|
|
@ -361,6 +400,81 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void
|
|||
);
|
||||
}
|
||||
|
||||
if (ports.entityDetails) {
|
||||
const entityDetails = ports.entityDetails;
|
||||
registerParsedTool(
|
||||
server,
|
||||
'entity_details',
|
||||
{
|
||||
title: 'Entity Details',
|
||||
description: 'Read raw table and column metadata from the latest KTX live-database scan snapshot.',
|
||||
inputSchema: entityDetailsSchema.shape,
|
||||
},
|
||||
entityDetailsSchema,
|
||||
async (input) => jsonToolResult(await entityDetails.read(input)),
|
||||
);
|
||||
}
|
||||
|
||||
if (ports.dictionarySearch) {
|
||||
const dictionarySearch = ports.dictionarySearch;
|
||||
registerParsedTool(
|
||||
server,
|
||||
'dictionary_search',
|
||||
{
|
||||
title: 'Dictionary Search',
|
||||
description:
|
||||
'Search profile-sampled warehouse values and report matching connection/source/column locations plus non-authoritative miss reasons.',
|
||||
inputSchema: dictionarySearchSchema.shape,
|
||||
},
|
||||
dictionarySearchSchema,
|
||||
async (input) => jsonToolResult(await dictionarySearch.search(input)),
|
||||
);
|
||||
}
|
||||
|
||||
if (ports.discover) {
|
||||
const discover = ports.discover;
|
||||
registerParsedTool(
|
||||
server,
|
||||
'discover_data',
|
||||
{
|
||||
title: 'Discover Data',
|
||||
description:
|
||||
'Search across KTX wiki pages, semantic-layer sources/measures/dimensions, and raw warehouse schema refs.',
|
||||
inputSchema: discoverDataSchema.shape,
|
||||
},
|
||||
discoverDataSchema,
|
||||
async (input) => jsonToolResult(await discover.search(input)),
|
||||
);
|
||||
}
|
||||
|
||||
if (ports.sqlExecution) {
|
||||
const sqlExecution = ports.sqlExecution;
|
||||
registerParsedTool(
|
||||
server,
|
||||
'sql_execution',
|
||||
{
|
||||
title: 'SQL Execution',
|
||||
description:
|
||||
'Execute one parser-validated read-only SQL query against a configured KTX connection and return structured rows.',
|
||||
inputSchema: sqlExecutionSchema.shape,
|
||||
},
|
||||
sqlExecutionSchema,
|
||||
async (input) => {
|
||||
try {
|
||||
return jsonToolResult(
|
||||
await sqlExecution.execute({
|
||||
connectionId: input.connectionId,
|
||||
sql: input.sql,
|
||||
maxRows: input.maxRows ?? 1000,
|
||||
}),
|
||||
);
|
||||
} catch (error) {
|
||||
return jsonErrorToolResult(error instanceof Error ? error.message : String(error));
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
if (ports.ingest) {
|
||||
const ingest = ports.ingest;
|
||||
registerParsedTool(
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@ export { createDefaultKtxMcpServer, createKtxMcpServer } from './server.js';
|
|||
export type {
|
||||
KtxConnectionSummary,
|
||||
KtxConnectionsMcpPort,
|
||||
KtxDiscoverDataMcpPort,
|
||||
KtxDictionarySearchMcpPort,
|
||||
KtxEntityDetailsMcpPort,
|
||||
KtxIngestDiffSummary,
|
||||
KtxIngestMcpPort,
|
||||
KtxIngestStatusResponse,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,12 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|||
import { AgentRunnerService } from '../agent/index.js';
|
||||
import { FakeSourceAdapter, type MemoryFlowReplayInput } from '../ingest/index.js';
|
||||
import { initKtxProject } from '../project/index.js';
|
||||
import { createKtxConnectorCapabilities, type KtxScanConnector, type KtxSchemaSnapshot } from '../scan/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxQueryResult,
|
||||
type KtxScanConnector,
|
||||
type KtxSchemaSnapshot,
|
||||
} from '../scan/index.js';
|
||||
import { writeLocalSlSource } from '../sl/index.js';
|
||||
import { createLocalProjectMcpContextPorts } from './local-project-ports.js';
|
||||
|
||||
|
|
@ -60,16 +65,119 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
};
|
||||
}
|
||||
|
||||
function testConnector(snapshot = testSnapshot()): KtxScanConnector {
|
||||
function testConnector(snapshot = testSnapshot(), queryResult?: KtxQueryResult): KtxScanConnector {
|
||||
return {
|
||||
id: `test:${snapshot.connectionId}`,
|
||||
driver: snapshot.driver,
|
||||
capabilities: createKtxConnectorCapabilities(),
|
||||
capabilities: createKtxConnectorCapabilities({ readOnlySql: queryResult !== undefined }),
|
||||
introspect: vi.fn(async () => snapshot),
|
||||
executeReadOnly: queryResult === undefined ? undefined : vi.fn(async () => queryResult),
|
||||
cleanup: vi.fn(async () => {}),
|
||||
};
|
||||
}
|
||||
|
||||
async function seedScanReport(projectDir: string, syncId = 'sync-1'): Promise<void> {
|
||||
const root = `raw-sources/warehouse/live-database/${syncId}`;
|
||||
await mkdir(join(projectDir, root, 'tables'), { recursive: true });
|
||||
await writeFile(
|
||||
join(projectDir, root, 'connection.json'),
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-05-14T09:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(projectDir, root, 'tables', 'orders.json'),
|
||||
JSON.stringify(
|
||||
{
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Customer orders',
|
||||
estimatedRows: 12,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(projectDir, root, 'scan-report.json'),
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
syncId,
|
||||
runId: 'scan-1',
|
||||
trigger: 'mcp',
|
||||
mode: 'structural',
|
||||
dryRun: false,
|
||||
artifactPaths: {
|
||||
rawSourcesDir: root,
|
||||
reportPath: `${root}/scan-report.json`,
|
||||
manifestShards: [],
|
||||
enrichmentArtifacts: [],
|
||||
},
|
||||
diffSummary: {
|
||||
tablesAdded: 0,
|
||||
tablesModified: 0,
|
||||
tablesDeleted: 0,
|
||||
tablesUnchanged: 1,
|
||||
columnsAdded: 0,
|
||||
columnsModified: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
manifestShardsWritten: 0,
|
||||
structuralSyncStats: {
|
||||
tablesCreated: 1,
|
||||
tablesUpdated: 0,
|
||||
tablesDeleted: 0,
|
||||
columnsCreated: 0,
|
||||
columnsUpdated: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
enrichment: {
|
||||
dataDictionary: 'skipped',
|
||||
tableDescriptions: 'skipped',
|
||||
columnDescriptions: 'skipped',
|
||||
embeddings: 'skipped',
|
||||
deterministicRelationships: 'skipped',
|
||||
llmRelationshipValidation: 'skipped',
|
||||
statisticalValidation: 'skipped',
|
||||
},
|
||||
capabilityGaps: [],
|
||||
warnings: [],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
|
||||
createdAt: '2026-05-14T09:00:00.000Z',
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'utf-8',
|
||||
);
|
||||
}
|
||||
|
||||
it('lists local project connections from ktx.yaml', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
|
|
@ -119,6 +227,382 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
expect(connector.cleanup).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('executes MCP SQL only after parser-backed validation passes', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
const connector = testConnector(testSnapshot(), {
|
||||
headers: ['id'],
|
||||
headerTypes: ['integer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
});
|
||||
const createConnector = vi.fn(async () => connector);
|
||||
const sqlAnalysis = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
|
||||
};
|
||||
const ports = createLocalProjectMcpContextPorts(project, {
|
||||
sqlAnalysis,
|
||||
localScan: {
|
||||
createConnector,
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
ports.sqlExecution?.execute({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select id from public.orders',
|
||||
maxRows: 5,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
headers: ['id'],
|
||||
headerTypes: ['integer'],
|
||||
rows: [[1]],
|
||||
rowCount: 1,
|
||||
});
|
||||
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select id from public.orders', 'postgres');
|
||||
expect(createConnector).toHaveBeenCalledWith('warehouse');
|
||||
expect(connector.executeReadOnly).toHaveBeenCalledWith(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select id from public.orders',
|
||||
maxRows: 5,
|
||||
},
|
||||
{ runId: 'mcp-sql-execution' },
|
||||
);
|
||||
expect(connector.cleanup).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects MCP SQL before connector execution when parser validation fails', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
const connector = testConnector(testSnapshot(), {
|
||||
headers: ['id'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
});
|
||||
const sqlAnalysis = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(),
|
||||
validateReadOnly: vi.fn(async () => ({
|
||||
ok: false,
|
||||
error: 'SQL contains read/write operation: Insert',
|
||||
})),
|
||||
};
|
||||
const ports = createLocalProjectMcpContextPorts(project, {
|
||||
sqlAnalysis,
|
||||
localScan: {
|
||||
createConnector: vi.fn(async () => connector),
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
ports.sqlExecution?.execute({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'with x as (insert into t values (1) returning *) select * from x',
|
||||
maxRows: 1000,
|
||||
}),
|
||||
).rejects.toThrow('SQL contains read/write operation: Insert');
|
||||
expect(connector.executeReadOnly).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('exposes local scan entity details through MCP ports', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
await seedScanReport(project.projectDir);
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
|
||||
await expect(
|
||||
ports.entityDetails?.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders', columns: ['id'] }],
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
results: [
|
||||
{
|
||||
ok: true,
|
||||
connectionId: 'warehouse',
|
||||
display: 'public.orders',
|
||||
columns: [{ name: 'id', nativeType: 'integer' }],
|
||||
snapshot: { syncId: 'sync-1', scanRunId: 'scan-1' },
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a structured local entity details error when no scan exists', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
|
||||
await expect(
|
||||
ports.entityDetails?.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders' }],
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
results: [
|
||||
{
|
||||
ok: false,
|
||||
connectionId: 'warehouse',
|
||||
error: { code: 'scan_missing' },
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('exposes local dictionary search through MCP ports', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
||||
`${JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
sqlAvailable: true,
|
||||
queryCount: 4,
|
||||
tables: [],
|
||||
columns: {
|
||||
'orders.status': {
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
column: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'string',
|
||||
distinctCount: 2,
|
||||
sampleValues: ['paid', 'refunded'],
|
||||
},
|
||||
},
|
||||
warnings: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Seed dictionary profile',
|
||||
);
|
||||
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
|
||||
await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toMatchObject({
|
||||
searched: [{ connectionId: 'warehouse', status: 'ready' }],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', matchedValue: 'paid' }],
|
||||
misses: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('reports missing local dictionary profiles through MCP ports', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
|
||||
await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toEqual({
|
||||
searched: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 0,
|
||||
syncId: null,
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'no_profile_artifact',
|
||||
},
|
||||
],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [],
|
||||
misses: [{ connectionId: 'warehouse', reason: 'no_profile_artifact' }],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('exposes local project discover_data across wiki, semantic-layer, and raw schema', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
await project.fileStore.writeFile(
|
||||
'wiki/global/orders-playbook.md',
|
||||
[
|
||||
'---',
|
||||
'summary: Paid order operations',
|
||||
'tags: [orders]',
|
||||
'refs: []',
|
||||
'sl_refs: []',
|
||||
'usage_mode: auto',
|
||||
'---',
|
||||
'',
|
||||
'Paid orders are used for customer activity analysis.',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed wiki',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
[
|
||||
'name: orders',
|
||||
'descriptions:',
|
||||
' user: Paid order facts',
|
||||
'table: public.orders',
|
||||
'grain: [id]',
|
||||
'columns:',
|
||||
' - name: status',
|
||||
' type: string',
|
||||
' descriptions:',
|
||||
' user: Payment status',
|
||||
'measures:',
|
||||
' - name: order_count',
|
||||
' expr: count(*)',
|
||||
' description: Number of paid orders',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed sl',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/connection.json',
|
||||
JSON.stringify({ connectionId: 'warehouse', driver: 'postgres', extractedAt: '2026-05-14T09:00:00.000Z' }, null, 2),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed connection',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/tables/public-orders.json',
|
||||
JSON.stringify(
|
||||
{
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Orders table',
|
||||
estimatedRows: 10,
|
||||
columns: [
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Order status',
|
||||
sampleValues: ['paid'],
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed table',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/scan-report.json',
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
syncId: 'sync-1',
|
||||
runId: 'scan-1',
|
||||
trigger: 'mcp',
|
||||
mode: 'enriched',
|
||||
dryRun: false,
|
||||
artifactPaths: {
|
||||
rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
|
||||
reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
|
||||
manifestShards: [],
|
||||
enrichmentArtifacts: [],
|
||||
},
|
||||
diffSummary: {
|
||||
tablesAdded: 1,
|
||||
tablesModified: 0,
|
||||
tablesDeleted: 0,
|
||||
tablesUnchanged: 0,
|
||||
columnsAdded: 0,
|
||||
columnsModified: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
manifestShardsWritten: 0,
|
||||
structuralSyncStats: {
|
||||
tablesCreated: 0,
|
||||
tablesUpdated: 0,
|
||||
tablesDeleted: 0,
|
||||
columnsCreated: 0,
|
||||
columnsUpdated: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
enrichment: {
|
||||
dataDictionary: 'completed',
|
||||
tableDescriptions: 'completed',
|
||||
columnDescriptions: 'completed',
|
||||
embeddings: 'skipped',
|
||||
deterministicRelationships: 'skipped',
|
||||
llmRelationshipValidation: 'skipped',
|
||||
statisticalValidation: 'skipped',
|
||||
},
|
||||
capabilityGaps: [],
|
||||
warnings: [],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
|
||||
createdAt: '2026-05-14T09:00:00.000Z',
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed scan report',
|
||||
);
|
||||
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
const results = await ports.discover?.search({ query: 'paid orders', connectionId: 'warehouse', limit: 10 });
|
||||
|
||||
expect(results).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ kind: 'wiki', id: 'orders-playbook' }),
|
||||
expect.objectContaining({ kind: 'sl_source', id: 'orders', connectionId: 'warehouse' }),
|
||||
expect.objectContaining({ kind: 'table', id: 'public.orders', connectionId: 'warehouse' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('triggers canonical bundle ingest and reads status, report, and replay through MCP ports', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import {
|
|||
import { createLocalKtxEmbeddingProviderFromConfig, KtxIngestEmbeddingPortAdapter } from '../llm/index.js';
|
||||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import {
|
||||
createKtxEntityDetailsService,
|
||||
getLocalScanReport,
|
||||
getLocalScanStatus,
|
||||
type KtxConnectionDriver,
|
||||
|
|
@ -26,8 +27,11 @@ import {
|
|||
type LocalScanMcpOptions,
|
||||
runLocalScan,
|
||||
} from '../scan/index.js';
|
||||
import { createKtxDiscoverDataService } from '../search/index.js';
|
||||
import type { SqlAnalysisDialect, SqlAnalysisPort } from '../sql-analysis/index.js';
|
||||
import {
|
||||
compileLocalSlQuery,
|
||||
createKtxDictionarySearchService,
|
||||
type LocalSlSourceSearchResult,
|
||||
type LocalSlSourceSummary,
|
||||
listLocalSlSources,
|
||||
|
|
@ -44,6 +48,7 @@ import type {
|
|||
KtxScanArtifactReadResponse,
|
||||
KtxScanArtifactSummary,
|
||||
KtxScanArtifactType,
|
||||
KtxSqlExecutionResponse,
|
||||
} from './types.js';
|
||||
|
||||
const LOCAL_AUTHOR = 'ktx';
|
||||
|
|
@ -53,6 +58,7 @@ const SL_SHAPE_WARNING = 'Local stdio validation checks YAML shape only; Python
|
|||
interface CreateLocalProjectMcpContextPortsOptions {
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
localIngest?: LocalIngestMcpOptions;
|
||||
localScan?: LocalScanMcpOptions;
|
||||
embeddingService?: KtxEmbeddingPort | null;
|
||||
|
|
@ -77,6 +83,10 @@ function dialectForDriver(driver: string | undefined): string {
|
|||
return map[normalized] ?? 'postgres';
|
||||
}
|
||||
|
||||
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
||||
return dialectForDriver(driver) as SqlAnalysisDialect;
|
||||
}
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
|
|
@ -378,6 +388,53 @@ function statusFromIngestReport(report: IngestReportSnapshot): KtxIngestStatusRe
|
|||
};
|
||||
}
|
||||
|
||||
async function executeValidatedReadOnlySql(
|
||||
project: KtxLocalProject,
|
||||
options: CreateLocalProjectMcpContextPortsOptions,
|
||||
input: { connectionId: string; sql: string; maxRows: number },
|
||||
): Promise<KtxSqlExecutionResponse> {
|
||||
const connectionId = assertSafeConnectionId(input.connectionId);
|
||||
const connection = project.config.connections[connectionId];
|
||||
if (!connection) {
|
||||
throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
|
||||
}
|
||||
if (!options.sqlAnalysis) {
|
||||
throw new Error('sql_execution requires parser-backed SQL validation.');
|
||||
}
|
||||
const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver(connection.driver));
|
||||
if (!validation.ok) {
|
||||
throw new Error(validation.error ?? 'SQL is not read-only.');
|
||||
}
|
||||
const createConnector = options.localScan?.createConnector;
|
||||
if (!createConnector) {
|
||||
throw new Error('sql_execution requires a local scan connector factory.');
|
||||
}
|
||||
|
||||
let connector: KtxScanConnector | null = null;
|
||||
try {
|
||||
connector = await createConnector(connectionId);
|
||||
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
|
||||
throw new Error(`Connection "${connectionId}" does not support read-only SQL execution.`);
|
||||
}
|
||||
const result = await connector.executeReadOnly(
|
||||
{
|
||||
connectionId,
|
||||
sql: input.sql,
|
||||
maxRows: input.maxRows,
|
||||
},
|
||||
{ runId: 'mcp-sql-execution' },
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
|
||||
rows: result.rows,
|
||||
rowCount: result.rowCount ?? result.rows.length,
|
||||
};
|
||||
} finally {
|
||||
await cleanupConnector(connector);
|
||||
}
|
||||
}
|
||||
|
||||
export function createLocalProjectMcpContextPorts(
|
||||
project: KtxLocalProject,
|
||||
options: CreateLocalProjectMcpContextPortsOptions = {},
|
||||
|
|
@ -575,8 +632,31 @@ export function createLocalProjectMcpContextPorts(
|
|||
});
|
||||
},
|
||||
},
|
||||
entityDetails: {
|
||||
async read(input) {
|
||||
return createKtxEntityDetailsService(project).read(input);
|
||||
},
|
||||
},
|
||||
dictionarySearch: {
|
||||
async search(input) {
|
||||
return createKtxDictionarySearchService(project).search(input);
|
||||
},
|
||||
},
|
||||
discover: {
|
||||
async search(input) {
|
||||
return createKtxDiscoverDataService(project, { userId: 'local', embeddingService }).search(input);
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
if (options.sqlAnalysis && options.localScan?.createConnector) {
|
||||
ports.sqlExecution = {
|
||||
async execute(input) {
|
||||
return executeValidatedReadOnlySql(project, options, input);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
if (options.localIngest) {
|
||||
ports.ingest = {
|
||||
async trigger(input) {
|
||||
|
|
|
|||
|
|
@ -6,11 +6,16 @@ import { createLocalProjectMemoryCapture } from '../memory/index.js';
|
|||
import { initKtxProject } from '../project/index.js';
|
||||
import { createKtxMcpServer } from './server.js';
|
||||
import type {
|
||||
KtxDiscoverDataMcpPort,
|
||||
KtxDictionarySearchMcpPort,
|
||||
KtxEntityDetailsMcpPort,
|
||||
KtxIngestMcpPort,
|
||||
KtxKnowledgeMcpPort,
|
||||
KtxMcpContextPorts,
|
||||
KtxScanMcpPort,
|
||||
KtxSemanticLayerMcpPort,
|
||||
KtxSqlExecutionMcpPort,
|
||||
KtxSqlExecutionResponse,
|
||||
MemoryCapturePort,
|
||||
} from './types.js';
|
||||
|
||||
|
|
@ -64,6 +69,242 @@ describe('createKtxMcpServer', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('registers parser-gated sql_execution when the host provides a SQL execution port', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const response: KtxSqlExecutionResponse = {
|
||||
headers: ['status', 'count'],
|
||||
headerTypes: ['text', 'bigint'],
|
||||
rows: [['paid', 42]],
|
||||
rowCount: 1,
|
||||
};
|
||||
const sqlExecution: KtxSqlExecutionMcpPort = {
|
||||
execute: vi.fn<KtxSqlExecutionMcpPort['execute']>().mockResolvedValue(response),
|
||||
};
|
||||
|
||||
createKtxMcpServer({
|
||||
server: fake.server,
|
||||
userContext: { userId: 'local-user' },
|
||||
contextTools: {
|
||||
sqlExecution,
|
||||
},
|
||||
});
|
||||
|
||||
expect(fake.tools.map((tool) => tool.name)).toEqual(['sql_execution']);
|
||||
await expect(
|
||||
getTool(fake.tools, 'sql_execution').handler({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select status, count(*) from public.orders group by status',
|
||||
maxRows: 50,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: JSON.stringify(
|
||||
{
|
||||
headers: ['status', 'count'],
|
||||
headerTypes: ['text', 'bigint'],
|
||||
rows: [['paid', 42]],
|
||||
rowCount: 1,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
},
|
||||
],
|
||||
structuredContent: {
|
||||
headers: ['status', 'count'],
|
||||
headerTypes: ['text', 'bigint'],
|
||||
rows: [['paid', 42]],
|
||||
rowCount: 1,
|
||||
},
|
||||
});
|
||||
expect(sqlExecution.execute).toHaveBeenCalledWith({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select status, count(*) from public.orders group by status',
|
||||
maxRows: 50,
|
||||
});
|
||||
});
|
||||
|
||||
it('registers entity_details when the host provides an entity-details port', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const entityDetails: KtxEntityDetailsMcpPort = {
|
||||
read: vi.fn<KtxEntityDetailsMcpPort['read']>().mockResolvedValue({
|
||||
results: [
|
||||
{
|
||||
ok: true,
|
||||
connectionId: 'warehouse',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
display: 'public.orders',
|
||||
kind: 'table',
|
||||
comment: 'Customer orders',
|
||||
estimatedRows: 12,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
snapshot: {
|
||||
syncId: 'sync-1',
|
||||
extractedAt: '2026-05-14T09:00:00.000Z',
|
||||
scanRunId: 'scan-1',
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
};
|
||||
|
||||
createKtxMcpServer({
|
||||
server: fake.server,
|
||||
userContext: { userId: 'local-user' },
|
||||
contextTools: { entityDetails },
|
||||
});
|
||||
|
||||
expect(fake.tools.map((tool) => tool.name)).toEqual(['entity_details']);
|
||||
await expect(
|
||||
getTool(fake.tools, 'entity_details').handler({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders', columns: ['id'] }],
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
structuredContent: {
|
||||
results: [
|
||||
{
|
||||
ok: true,
|
||||
connectionId: 'warehouse',
|
||||
display: 'public.orders',
|
||||
columns: [{ name: 'id' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(entityDetails.read).toHaveBeenCalledWith({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders', columns: ['id'] }],
|
||||
});
|
||||
});
|
||||
|
||||
it('registers dictionary_search when the host provides a dictionary-search port', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const dictionarySearch: KtxDictionarySearchMcpPort = {
|
||||
search: vi.fn<KtxDictionarySearchMcpPort['search']>().mockResolvedValue({
|
||||
searched: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 1,
|
||||
syncId: 'sync-1',
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'ready',
|
||||
},
|
||||
],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
columnName: 'status',
|
||||
matchedValue: 'paid',
|
||||
cardinality: 3,
|
||||
},
|
||||
],
|
||||
misses: [],
|
||||
},
|
||||
],
|
||||
}),
|
||||
};
|
||||
|
||||
createKtxMcpServer({
|
||||
server: fake.server,
|
||||
userContext: { userId: 'local-user' },
|
||||
contextTools: { dictionarySearch },
|
||||
});
|
||||
|
||||
expect(fake.tools.map((tool) => tool.name)).toEqual(['dictionary_search']);
|
||||
await expect(
|
||||
getTool(fake.tools, 'dictionary_search').handler({
|
||||
connectionId: 'warehouse',
|
||||
values: ['paid'],
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
structuredContent: {
|
||||
searched: [{ connectionId: 'warehouse', status: 'ready' }],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status' }],
|
||||
misses: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(dictionarySearch.search).toHaveBeenCalledWith({
|
||||
connectionId: 'warehouse',
|
||||
values: ['paid'],
|
||||
});
|
||||
});
|
||||
|
||||
it('registers discover_data when the host provides a discover port', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const discover: KtxDiscoverDataMcpPort = {
|
||||
search: vi.fn<KtxDiscoverDataMcpPort['search']>().mockResolvedValue([
|
||||
{
|
||||
kind: 'table',
|
||||
id: 'public.orders',
|
||||
score: 1,
|
||||
summary: 'Orders table',
|
||||
snippet: 'id, status',
|
||||
matchedOn: 'name',
|
||||
connectionId: 'warehouse',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
},
|
||||
]),
|
||||
};
|
||||
|
||||
createKtxMcpServer({
|
||||
server: fake.server,
|
||||
userContext: { userId: 'local-user' },
|
||||
contextTools: { discover },
|
||||
});
|
||||
|
||||
expect(fake.tools.map((tool) => tool.name)).toEqual(['discover_data']);
|
||||
await expect(
|
||||
getTool(fake.tools, 'discover_data').handler({
|
||||
query: 'orders',
|
||||
connectionId: 'warehouse',
|
||||
kinds: ['table'],
|
||||
limit: 5,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
structuredContent: [
|
||||
{
|
||||
kind: 'table',
|
||||
id: 'public.orders',
|
||||
connectionId: 'warehouse',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(discover.search).toHaveBeenCalledWith({
|
||||
query: 'orders',
|
||||
connectionId: 'warehouse',
|
||||
kinds: ['table'],
|
||||
limit: 5,
|
||||
});
|
||||
});
|
||||
|
||||
it('registers memory capture tools without host app dependencies', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const capture: MemoryCapturePort = {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import type { IngestReportSnapshot, MemoryFlowReplayInput, TableUsageOutput } from '../ingest/index.js';
|
||||
import type { MemoryCaptureService } from '../memory/index.js';
|
||||
import type { KtxEntityDetailsInput, KtxEntityDetailsResponse } from '../scan/entity-details.js';
|
||||
import type { KtxScanMode, KtxScanReport } from '../scan/index.js';
|
||||
import type { KtxDiscoverDataInput, KtxDiscoverDataResponse } from '../search/index.js';
|
||||
import type {
|
||||
KtxDictionarySearchInput,
|
||||
KtxDictionarySearchResponse,
|
||||
SemanticLayerQueryInput,
|
||||
SlDictionaryMatch,
|
||||
SlSearchLaneSummary,
|
||||
|
|
@ -312,10 +316,37 @@ export interface KtxScanMcpPort {
|
|||
readArtifact?(input: { runId: string; path: string }): Promise<KtxScanArtifactReadResponse | null>;
|
||||
}
|
||||
|
||||
export interface KtxEntityDetailsMcpPort {
|
||||
read(input: KtxEntityDetailsInput): Promise<KtxEntityDetailsResponse>;
|
||||
}
|
||||
|
||||
export interface KtxDictionarySearchMcpPort {
|
||||
search(input: KtxDictionarySearchInput): Promise<KtxDictionarySearchResponse>;
|
||||
}
|
||||
|
||||
export interface KtxDiscoverDataMcpPort {
|
||||
search(input: KtxDiscoverDataInput): Promise<KtxDiscoverDataResponse>;
|
||||
}
|
||||
|
||||
export interface KtxSqlExecutionResponse {
|
||||
headers: string[];
|
||||
headerTypes?: string[];
|
||||
rows: unknown[][];
|
||||
rowCount: number;
|
||||
}
|
||||
|
||||
export interface KtxSqlExecutionMcpPort {
|
||||
execute(input: { connectionId: string; sql: string; maxRows: number }): Promise<KtxSqlExecutionResponse>;
|
||||
}
|
||||
|
||||
export interface KtxMcpContextPorts {
|
||||
connections?: KtxConnectionsMcpPort;
|
||||
knowledge?: KtxKnowledgeMcpPort;
|
||||
semanticLayer?: KtxSemanticLayerMcpPort;
|
||||
entityDetails?: KtxEntityDetailsMcpPort;
|
||||
dictionarySearch?: KtxDictionarySearchMcpPort;
|
||||
discover?: KtxDiscoverDataMcpPort;
|
||||
sqlExecution?: KtxSqlExecutionMcpPort;
|
||||
ingest?: KtxIngestMcpPort;
|
||||
scan?: KtxScanMcpPort;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue