mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-19 08:28:06 +02:00
feat(mcp):added MCP server (#97)
* docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
parent
c7b64379bf
commit
b759a4a286
78 changed files with 13689 additions and 190 deletions
|
|
@ -18,6 +18,9 @@ const sqlAnalysis: SqlAnalysisPort = {
|
|||
async analyzeBatch() {
|
||||
return new Map();
|
||||
},
|
||||
async validateReadOnly() {
|
||||
return { ok: true };
|
||||
},
|
||||
};
|
||||
|
||||
const reader: HistoricSqlReader = {
|
||||
|
|
@ -79,6 +82,9 @@ describe('HistoricSqlSourceAdapter', () => {
|
|||
],
|
||||
]);
|
||||
},
|
||||
async validateReadOnly() {
|
||||
return { ok: true };
|
||||
},
|
||||
};
|
||||
const adapter = new HistoricSqlSourceAdapter({
|
||||
sqlAnalysis: batchSqlAnalysis,
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ function acceptanceSqlAnalysis(): SqlAnalysisPort {
|
|||
);
|
||||
},
|
||||
),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
],
|
||||
['bad-parse', { tablesTouched: [], columnsByClause: {}, error: 'parse failed' }],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
|
|
@ -207,6 +208,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
},
|
||||
],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
|
|
@ -283,6 +285,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
},
|
||||
],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
|
|
@ -403,6 +406,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
},
|
||||
],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
|
|
|
|||
|
|
@ -94,11 +94,15 @@ describe('ingest runtime assets', () => {
|
|||
|
||||
it('packages identifier verification prompt assets', async () => {
|
||||
const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
|
||||
const legacyConnectionPrefix = ['connection', 'Name'].join('');
|
||||
|
||||
expect(shared).toContain('## Identifier Verification Protocol');
|
||||
expect(shared).toContain('discover_data');
|
||||
expect(shared).toContain('entity_details');
|
||||
expect(shared).toContain('sql_execution');
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
|
||||
expect(shared).toContain('sql_execution({connectionId, sql: "SELECT DISTINCT');
|
||||
expect(shared).toContain('sql_execution({connectionId, sql: "SELECT 1 FROM');
|
||||
expect(shared).not.toContain(`entity_details({${legacyConnectionPrefix}`);
|
||||
expect(shared).not.toContain(`sql_execution({${legacyConnectionPrefix}`);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -97,6 +97,9 @@ describe('local ingest adapters', () => {
|
|||
async analyzeBatch() {
|
||||
return new Map();
|
||||
},
|
||||
async validateReadOnly() {
|
||||
return { ok: true };
|
||||
},
|
||||
};
|
||||
const adapters = createDefaultLocalIngestAdapters(project, {
|
||||
historicSql: {
|
||||
|
|
@ -140,6 +143,9 @@ describe('local ingest adapters', () => {
|
|||
async analyzeBatch() {
|
||||
return new Map();
|
||||
},
|
||||
async validateReadOnly() {
|
||||
return { ok: true };
|
||||
},
|
||||
},
|
||||
reader,
|
||||
queryClient,
|
||||
|
|
@ -166,6 +172,9 @@ describe('local ingest adapters', () => {
|
|||
async analyzeBatch() {
|
||||
return new Map();
|
||||
},
|
||||
async validateReadOnly() {
|
||||
return { ok: true };
|
||||
},
|
||||
},
|
||||
postgresQueryClient: {
|
||||
async executeQuery() {
|
||||
|
|
@ -258,6 +267,9 @@ describe('local ingest adapters', () => {
|
|||
async analyzeBatch() {
|
||||
return new Map();
|
||||
},
|
||||
async validateReadOnly() {
|
||||
return { ok: true };
|
||||
},
|
||||
},
|
||||
postgresQueryClient: {
|
||||
async executeQuery() {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
|
||||
import type { BaseTool, ToolContext } from '../../../tools/index.js';
|
||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||
import type { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
describe('DiscoverDataTool', () => {
|
||||
const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
|
||||
|
|
@ -36,7 +36,7 @@ describe('DiscoverDataTool', () => {
|
|||
catalog.searchByName.mockResolvedValue([
|
||||
{
|
||||
kind: 'table',
|
||||
connectionName: 'warehouse',
|
||||
connectionId: 'warehouse',
|
||||
ref: { catalog: null, db: 'public', name: 'orders' },
|
||||
display: 'public.orders',
|
||||
matchedOn: 'name',
|
||||
|
|
@ -45,28 +45,28 @@ describe('DiscoverDataTool', () => {
|
|||
});
|
||||
|
||||
it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => {
|
||||
const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context);
|
||||
const result = await tool.call({ query: 'orders', connectionId: 'warehouse', limit: 5 }, context);
|
||||
|
||||
expect(result.markdown).toContain('## Wiki Pages');
|
||||
expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content');
|
||||
expect(result.markdown).toContain('## Semantic Layer Sources');
|
||||
expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML');
|
||||
expect(result.markdown).toContain('## Raw Warehouse Schema');
|
||||
expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`');
|
||||
expect(result.markdown).toContain('use `entity_details({connectionId, targets: [{display}]})`');
|
||||
expect(result.structured.raw?.hits).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('includes connectionName on raw schema hits so entity_details can follow up', async () => {
|
||||
it('includes connectionId on raw schema hits so entity_details can follow up', async () => {
|
||||
const multiConnectionContext: ToolContext = {
|
||||
...context,
|
||||
session: { allowedConnectionNames: new Set(['warehouse', 'analytics']) } as any,
|
||||
};
|
||||
catalog.searchByName.mockImplementation(async (connectionName: string, query: string) => [
|
||||
catalog.searchByName.mockImplementation(async (connectionId: string, query: string) => [
|
||||
{
|
||||
kind: 'table',
|
||||
connectionName,
|
||||
ref: { catalog: null, db: 'public', name: `${connectionName}_${query}` },
|
||||
display: `public.${connectionName}_${query}`,
|
||||
connectionId,
|
||||
ref: { catalog: null, db: 'public', name: `${connectionId}_${query}` },
|
||||
display: `public.${connectionId}_${query}`,
|
||||
matchedOn: 'name',
|
||||
},
|
||||
]);
|
||||
|
|
@ -75,16 +75,16 @@ describe('DiscoverDataTool', () => {
|
|||
|
||||
expect(catalog.searchByName).toHaveBeenCalledWith('analytics', 'orders', 10);
|
||||
expect(catalog.searchByName).toHaveBeenCalledWith('warehouse', 'orders', 10);
|
||||
expect(result.markdown).toContain('connectionName=analytics');
|
||||
expect(result.markdown).toContain('connectionName=warehouse');
|
||||
expect(result.markdown).toContain('connectionId=analytics');
|
||||
expect(result.markdown).toContain('connectionId=warehouse');
|
||||
expect(result.markdown).toContain(
|
||||
'entity_details({connectionName: "analytics", targets: [{display: "public.analytics_orders"}]})',
|
||||
'entity_details({connectionId: "analytics", targets: [{display: "public.analytics_orders"}]})',
|
||||
);
|
||||
expect(result.structured.raw?.hits.map((hit) => hit.connectionName)).toEqual(['analytics', 'warehouse']);
|
||||
expect(result.structured.raw?.hits.map((hit) => hit.connectionId)).toEqual(['analytics', 'warehouse']);
|
||||
});
|
||||
|
||||
it('refuses explicit out-of-scope connection names', async () => {
|
||||
const result = await tool.call({ query: 'orders', connectionName: 'billing' }, context);
|
||||
const result = await tool.call({ query: 'orders', connectionId: 'billing' }, context);
|
||||
|
||||
expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
|
||||
expect(result.structured).toEqual({ wiki: null, sl: null, raw: null });
|
||||
|
|
@ -99,7 +99,7 @@ describe('DiscoverDataTool', () => {
|
|||
structured: { sourceName: 'orders' },
|
||||
});
|
||||
|
||||
const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context);
|
||||
const result = await tool.call({ sourceName: 'orders', connectionId: 'warehouse' }, context);
|
||||
|
||||
expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context);
|
||||
expect(wikiSearchTool.call).not.toHaveBeenCalled();
|
||||
|
|
@ -112,8 +112,20 @@ describe('DiscoverDataTool', () => {
|
|||
slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } });
|
||||
catalog.searchByName.mockResolvedValueOnce([]);
|
||||
|
||||
const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context);
|
||||
const result = await tool.call({ query: 'customer source', connectionId: 'warehouse' }, context);
|
||||
|
||||
expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.');
|
||||
});
|
||||
|
||||
it('uses connectionId as the optional connection filter', () => {
|
||||
const legacyConnectionField = ['connection', 'Name'].join('');
|
||||
|
||||
expect(tool.parseInput({ query: 'orders', connectionId: 'warehouse', limit: 5 })).toEqual({
|
||||
query: 'orders',
|
||||
connectionId: 'warehouse',
|
||||
limit: 5,
|
||||
});
|
||||
|
||||
expect(() => tool.parseInput({ query: 'orders', [legacyConnectionField]: 'warehouse', limit: 5 })).toThrow();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
import { z } from 'zod';
|
||||
import { WarehouseCatalogService, type RawSchemaHit } from '../../../scan/warehouse-catalog.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
|
||||
import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js';
|
||||
|
||||
const discoverDataInputSchema = z.object({
|
||||
query: z.string().optional(),
|
||||
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
|
||||
connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
|
||||
limit: z.number().int().positive().max(50).optional().default(10),
|
||||
sourceName: z.string().optional(),
|
||||
});
|
||||
}).strict();
|
||||
|
||||
type DiscoverDataInput = z.input<typeof discoverDataInputSchema>;
|
||||
|
||||
|
|
@ -62,16 +62,16 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
|
|||
|
||||
async call(input: DiscoverDataInput, context: ToolContext): Promise<ToolOutput<DiscoverDataStructured>> {
|
||||
const allowed = allowedConnectionNames(context);
|
||||
if (input.connectionName && allowed && !allowed.has(input.connectionName)) {
|
||||
if (input.connectionId && allowed && !allowed.has(input.connectionId)) {
|
||||
return {
|
||||
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
|
||||
markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
|
||||
structured: { wiki: null, sl: null, raw: null },
|
||||
};
|
||||
}
|
||||
|
||||
if (input.sourceName) {
|
||||
const sl = await this.deps.slDiscoverTool.call(
|
||||
{ sourceName: input.sourceName, connectionId: input.connectionName },
|
||||
{ sourceName: input.sourceName, connectionId: input.connectionId },
|
||||
context,
|
||||
);
|
||||
return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } };
|
||||
|
|
@ -93,7 +93,7 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
|
|||
}
|
||||
|
||||
const slResult = await this.deps.slDiscoverTool.call(
|
||||
{ query: query || undefined, connectionId: input.connectionName },
|
||||
{ query: query || undefined, connectionId: input.connectionId },
|
||||
context,
|
||||
);
|
||||
if (totalSources(slResult.structured) > 0) {
|
||||
|
|
@ -107,23 +107,23 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
|
|||
}
|
||||
|
||||
const catalog = this.deps.catalogFactory(context);
|
||||
const connections = input.connectionName ? [input.connectionName] : [...(allowed ?? [])].sort();
|
||||
const connections = input.connectionId ? [input.connectionId] : [...(allowed ?? [])].sort();
|
||||
const rawHits: RawSchemaHit[] = [];
|
||||
for (const connectionName of connections) {
|
||||
rawHits.push(...(await catalog.searchByName(connectionName, query, limit)));
|
||||
for (const connectionId of connections) {
|
||||
rawHits.push(...(await catalog.searchByName(connectionId, query, limit)));
|
||||
}
|
||||
if (rawHits.length > 0) {
|
||||
parts.push(
|
||||
'## Raw Warehouse Schema',
|
||||
'> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values',
|
||||
'> use `entity_details({connectionId, targets: [{display}]})` for full DDL + sample values',
|
||||
);
|
||||
parts.push(
|
||||
rawHits
|
||||
.slice(0, limit)
|
||||
.map(
|
||||
(hit) =>
|
||||
`- ${hit.kind}: ${hit.display} [connectionName=${hit.connectionName}] (matched on ${hit.matchedOn}) - ` +
|
||||
`follow up with \`entity_details({connectionName: "${hit.connectionName}", targets: [{display: "${hit.display}"}]})\``,
|
||||
`- ${hit.kind}: ${hit.display} [connectionId=${hit.connectionId}] (matched on ${hit.matchedOn}) - ` +
|
||||
`follow up with \`entity_details({connectionId: "${hit.connectionId}", targets: [{display: "${hit.display}"}]})\``,
|
||||
)
|
||||
.join('\n'),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@ import { tmpdir } from 'node:os';
|
|||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../../../project/index.js';
|
||||
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
|
||||
import type { ToolContext } from '../../../tools/index.js';
|
||||
import { EntityDetailsTool } from './entity-details.tool.js';
|
||||
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
describe('EntityDetailsTool', () => {
|
||||
let tempDir: string;
|
||||
|
|
@ -32,11 +32,11 @@ describe('EntityDetailsTool', () => {
|
|||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-1') {
|
||||
const root = `raw-sources/${connectionName}/live-database/${syncId}`;
|
||||
async function seedLiveDatabaseScan(connectionId = 'warehouse', syncId = 'sync-1') {
|
||||
const root = `raw-sources/${connectionId}/live-database/${syncId}`;
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/connection.json`,
|
||||
JSON.stringify({ connectionId: connectionName, driver: 'postgres', extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
|
||||
JSON.stringify({ connectionId, driver: 'postgres', extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed connection',
|
||||
|
|
@ -84,7 +84,7 @@ describe('EntityDetailsTool', () => {
|
|||
`${root}/enrichment/relationship-profile.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: connectionName,
|
||||
connectionId,
|
||||
driver: 'postgres',
|
||||
tables: [{ table: { catalog: null, db: 'public', name: 'orders' }, rowCount: 12 }],
|
||||
columns: {
|
||||
|
|
@ -109,7 +109,7 @@ describe('EntityDetailsTool', () => {
|
|||
}
|
||||
|
||||
it('returns scoped table detail for a display target', async () => {
|
||||
const result = await tool.call({ connectionName: 'warehouse', targets: [{ display: 'public.orders' }] }, context);
|
||||
const result = await tool.call({ connectionId: 'warehouse', targets: [{ display: 'public.orders' }] }, context);
|
||||
|
||||
expect(result.markdown).toContain('### public.orders');
|
||||
expect(result.markdown).toContain('- status (text, nullable=false)');
|
||||
|
|
@ -120,7 +120,7 @@ describe('EntityDetailsTool', () => {
|
|||
|
||||
it('resolves display targets that include a column name', async () => {
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] },
|
||||
{ connectionId: 'warehouse', targets: [{ display: 'public.orders.status' }] },
|
||||
context,
|
||||
);
|
||||
|
||||
|
|
@ -133,7 +133,7 @@ describe('EntityDetailsTool', () => {
|
|||
|
||||
it('reports missing explicit columns instead of returning an empty column list', async () => {
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
|
||||
{ connectionId: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
|
||||
context,
|
||||
);
|
||||
|
||||
|
|
@ -146,7 +146,7 @@ describe('EntityDetailsTool', () => {
|
|||
it('reports missing structured table targets in model-visible markdown', async () => {
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionName: 'warehouse',
|
||||
connectionId: 'warehouse',
|
||||
targets: [{ catalog: null, db: 'public', name: 'orderz' }],
|
||||
},
|
||||
context,
|
||||
|
|
@ -161,7 +161,7 @@ describe('EntityDetailsTool', () => {
|
|||
it('reports missing structured column targets in model-visible markdown', async () => {
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionName: 'warehouse',
|
||||
connectionId: 'warehouse',
|
||||
targets: [{ catalog: null, db: 'public', name: 'orders', column: 'plan_tier' }],
|
||||
},
|
||||
context,
|
||||
|
|
@ -175,7 +175,7 @@ describe('EntityDetailsTool', () => {
|
|||
|
||||
it('returns a no-scan state distinct from not found', async () => {
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'empty', targets: [{ display: 'public.orders' }] },
|
||||
{ connectionId: 'empty', targets: [{ display: 'public.orders' }] },
|
||||
{ ...context, session: { ...context.session!, allowedConnectionNames: new Set(['empty']) } },
|
||||
);
|
||||
|
||||
|
|
@ -184,9 +184,30 @@ describe('EntityDetailsTool', () => {
|
|||
});
|
||||
|
||||
it('refuses out-of-scope connections', async () => {
|
||||
const result = await tool.call({ connectionName: 'billing', targets: [{ display: 'public.orders' }] }, context);
|
||||
const result = await tool.call({ connectionId: 'billing', targets: [{ display: 'public.orders' }] }, context);
|
||||
|
||||
expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
|
||||
expect(result.structured.scanAvailable).toBe(false);
|
||||
});
|
||||
|
||||
it('uses connectionId as the public input field', async () => {
|
||||
const legacyConnectionField = ['connection', 'Name'].join('');
|
||||
|
||||
expect(
|
||||
tool.parseInput({
|
||||
connectionId: 'warehouse',
|
||||
targets: [{ display: 'public.orders' }],
|
||||
}),
|
||||
).toEqual({
|
||||
connectionId: 'warehouse',
|
||||
targets: [{ display: 'public.orders' }],
|
||||
});
|
||||
|
||||
expect(() =>
|
||||
tool.parseInput({
|
||||
[legacyConnectionField]: 'warehouse',
|
||||
targets: [{ display: 'public.orders' }],
|
||||
}),
|
||||
).toThrow();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { z } from 'zod';
|
||||
import type { KtxTableRef } from '../../../scan/types.js';
|
||||
import { WarehouseCatalogService, type TableDetail } from '../../../scan/warehouse-catalog.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
|
||||
import { WarehouseCatalogService, type TableDetail } from './warehouse-catalog.service.js';
|
||||
|
||||
const targetSchema = z.union([
|
||||
z.object({ display: z.string().min(1) }),
|
||||
|
|
@ -14,9 +14,9 @@ const targetSchema = z.union([
|
|||
]);
|
||||
|
||||
const entityDetailsInputSchema = z.object({
|
||||
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
|
||||
connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
|
||||
targets: z.array(targetSchema).min(1).max(50),
|
||||
});
|
||||
}).strict();
|
||||
|
||||
type EntityDetailsInput = z.infer<typeof entityDetailsInputSchema>;
|
||||
type EntityDetailsTarget = EntityDetailsInput['targets'][number];
|
||||
|
|
@ -47,14 +47,14 @@ function appendMissingTargetMarkdown(parts: string[], target: EntityDetailsTarge
|
|||
|
||||
async function resolveTarget(
|
||||
catalog: WarehouseCatalogService,
|
||||
connectionName: string,
|
||||
connectionId: string,
|
||||
target: EntityDetailsTarget,
|
||||
): Promise<{ resolved: (KtxTableRef & { column?: string }) | null; candidates: KtxTableRef[] }> {
|
||||
if ('display' in target) {
|
||||
return catalog.resolveDisplayTarget(connectionName, target.display);
|
||||
return catalog.resolveDisplayTarget(connectionId, target.display);
|
||||
}
|
||||
|
||||
const candidateResolution = await catalog.resolveDisplayTarget(connectionName, targetLabel(target));
|
||||
const candidateResolution = await catalog.resolveDisplayTarget(connectionId, targetLabel(target));
|
||||
return {
|
||||
resolved: {
|
||||
catalog: target.catalog,
|
||||
|
|
@ -107,18 +107,18 @@ export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema>
|
|||
|
||||
async call(input: EntityDetailsInput, context: ToolContext): Promise<ToolOutput<EntityDetailsStructured>> {
|
||||
const allowed = allowedConnectionNames(context);
|
||||
if (allowed && !allowed.has(input.connectionName)) {
|
||||
if (allowed && !allowed.has(input.connectionId)) {
|
||||
return {
|
||||
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
|
||||
markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
|
||||
structured: { resolved: [], missing: [], scanAvailable: false },
|
||||
};
|
||||
}
|
||||
|
||||
const catalog = this.catalogFactory(context);
|
||||
const scanAvailable = await catalog.hasScan(input.connectionName);
|
||||
const scanAvailable = await catalog.hasScan(input.connectionId);
|
||||
if (!scanAvailable) {
|
||||
return {
|
||||
markdown: `No live-database scan available for connection "${input.connectionName}"; run \`ktx scan\` first.`,
|
||||
markdown: `No live-database scan available for connection "${input.connectionId}"; run \`ktx scan\` first.`,
|
||||
structured: { resolved: [], missing: [], scanAvailable: false },
|
||||
};
|
||||
}
|
||||
|
|
@ -128,13 +128,13 @@ export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema>
|
|||
const missing: EntityDetailsStructured['missing'] = [];
|
||||
|
||||
for (const target of input.targets) {
|
||||
const resolution = await resolveTarget(catalog, input.connectionName, target);
|
||||
const resolution = await resolveTarget(catalog, input.connectionId, target);
|
||||
if (!resolution.resolved) {
|
||||
missing.push({ target, candidates: resolution.candidates });
|
||||
appendMissingTargetMarkdown(parts, target, resolution.candidates);
|
||||
continue;
|
||||
}
|
||||
const detail = await catalog.getTable({ connectionName: input.connectionName, ...resolution.resolved });
|
||||
const detail = await catalog.getTable({ connectionId: input.connectionId, ...resolution.resolved });
|
||||
if (!detail) {
|
||||
missing.push({ target, candidates: resolution.candidates });
|
||||
appendMissingTargetMarkdown(parts, target, resolution.candidates);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import type { KtxFileStorePort } from '../../../core/index.js';
|
||||
import type { SlConnectionCatalogPort } from '../../../sl/index.js';
|
||||
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
|
||||
import type { BaseTool, ToolContext } from '../../../tools/index.js';
|
||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||
import { EntityDetailsTool } from './entity-details.tool.js';
|
||||
import { SqlExecutionTool } from './sql-execution.tool.js';
|
||||
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
export function createWarehouseVerificationTools(deps: {
|
||||
connections: SlConnectionCatalogPort;
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ describe('SqlExecutionTool', () => {
|
|||
connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 });
|
||||
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 },
|
||||
{ connectionId: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 },
|
||||
context,
|
||||
);
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ describe('SqlExecutionTool', () => {
|
|||
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => {
|
||||
connections.executeQuery.mockClear();
|
||||
|
||||
const result = await tool.call({ connectionName: 'warehouse', sql }, context);
|
||||
const result = await tool.call({ connectionId: 'warehouse', sql }, context);
|
||||
|
||||
expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
expect(connections.executeQuery).not.toHaveBeenCalled();
|
||||
|
|
@ -44,11 +44,35 @@ describe('SqlExecutionTool', () => {
|
|||
connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist'));
|
||||
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 },
|
||||
{ connectionId: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 },
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('relation "orbit_analytics.customer" does not exist');
|
||||
expect(result.structured.error).toContain('relation "orbit_analytics.customer" does not exist');
|
||||
});
|
||||
|
||||
it('uses connectionId as the public input field', () => {
|
||||
const legacyConnectionField = ['connection', 'Name'].join('');
|
||||
|
||||
expect(
|
||||
tool.parseInput({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select 1',
|
||||
rowLimit: 5,
|
||||
}),
|
||||
).toEqual({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select 1',
|
||||
rowLimit: 5,
|
||||
});
|
||||
|
||||
expect(() =>
|
||||
tool.parseInput({
|
||||
[legacyConnectionField]: 'warehouse',
|
||||
sql: 'select 1',
|
||||
rowLimit: 5,
|
||||
}),
|
||||
).toThrow();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@ import type { SlConnectionCatalogPort } from '../../../sl/index.js';
|
|||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
|
||||
|
||||
const sqlExecutionInputSchema = z.object({
|
||||
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
|
||||
connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
|
||||
sql: z.string().min(1),
|
||||
rowLimit: z.number().int().positive().max(1000).optional().default(100),
|
||||
});
|
||||
}).strict();
|
||||
|
||||
type SqlExecutionInput = z.input<typeof sqlExecutionInputSchema>;
|
||||
|
||||
|
|
@ -54,9 +54,9 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
|||
|
||||
async call(input: SqlExecutionInput, context: ToolContext): Promise<ToolOutput<SqlExecutionStructured>> {
|
||||
const allowed = context.session?.allowedConnectionNames;
|
||||
if (allowed && !allowed.has(input.connectionName)) {
|
||||
if (allowed && !allowed.has(input.connectionId)) {
|
||||
return {
|
||||
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
|
||||
markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
|
||||
structured: {
|
||||
headers: [],
|
||||
rows: [],
|
||||
|
|
@ -83,7 +83,7 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
|||
}
|
||||
|
||||
try {
|
||||
const result = await this.connections.executeQuery(input.connectionName, wrappedSql);
|
||||
const result = await this.connections.executeQuery(input.connectionId, wrappedSql);
|
||||
const headers = result.headers ?? [];
|
||||
const rows = result.rows ?? [];
|
||||
const rowCount = result.totalRows ?? rows.length;
|
||||
|
|
|
|||
|
|
@ -143,6 +143,45 @@ const scanArtifactReadSchema = z.object({
|
|||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
const entityDetailsTableRefSchema = z.object({
|
||||
catalog: z.string().nullable(),
|
||||
db: z.string().nullable(),
|
||||
name: z.string().min(1),
|
||||
});
|
||||
|
||||
const entityDetailsSchema = z.object({
|
||||
connectionId: connectionIdSchema,
|
||||
entities: z
|
||||
.array(
|
||||
z.object({
|
||||
table: z.union([z.string().min(1), entityDetailsTableRefSchema]),
|
||||
columns: z.array(z.string().min(1)).optional(),
|
||||
}),
|
||||
)
|
||||
.min(1)
|
||||
.max(20),
|
||||
});
|
||||
|
||||
const dictionarySearchSchema = z.object({
|
||||
values: z.array(z.string().min(1)).min(1).max(20),
|
||||
connectionId: connectionIdSchema.optional(),
|
||||
});
|
||||
|
||||
const discoverDataKindSchema = z.enum(['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column']);
|
||||
|
||||
const discoverDataSchema = z.object({
|
||||
query: z.string().min(1),
|
||||
connectionId: connectionIdSchema.optional(),
|
||||
kinds: z.array(discoverDataKindSchema).optional(),
|
||||
limit: z.number().int().min(1).max(50).default(15).optional(),
|
||||
});
|
||||
|
||||
const sqlExecutionSchema = z.object({
|
||||
connectionId: connectionIdSchema,
|
||||
sql: z.string().min(1),
|
||||
maxRows: z.number().int().min(1).max(10_000).default(1000).optional(),
|
||||
});
|
||||
|
||||
export function jsonToolResult<T extends object>(structuredContent: T): KtxMcpToolResult<T> {
|
||||
return {
|
||||
content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }],
|
||||
|
|
@ -361,6 +400,81 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void
|
|||
);
|
||||
}
|
||||
|
||||
if (ports.entityDetails) {
|
||||
const entityDetails = ports.entityDetails;
|
||||
registerParsedTool(
|
||||
server,
|
||||
'entity_details',
|
||||
{
|
||||
title: 'Entity Details',
|
||||
description: 'Read raw table and column metadata from the latest KTX live-database scan snapshot.',
|
||||
inputSchema: entityDetailsSchema.shape,
|
||||
},
|
||||
entityDetailsSchema,
|
||||
async (input) => jsonToolResult(await entityDetails.read(input)),
|
||||
);
|
||||
}
|
||||
|
||||
if (ports.dictionarySearch) {
|
||||
const dictionarySearch = ports.dictionarySearch;
|
||||
registerParsedTool(
|
||||
server,
|
||||
'dictionary_search',
|
||||
{
|
||||
title: 'Dictionary Search',
|
||||
description:
|
||||
'Search profile-sampled warehouse values and report matching connection/source/column locations plus non-authoritative miss reasons.',
|
||||
inputSchema: dictionarySearchSchema.shape,
|
||||
},
|
||||
dictionarySearchSchema,
|
||||
async (input) => jsonToolResult(await dictionarySearch.search(input)),
|
||||
);
|
||||
}
|
||||
|
||||
if (ports.discover) {
|
||||
const discover = ports.discover;
|
||||
registerParsedTool(
|
||||
server,
|
||||
'discover_data',
|
||||
{
|
||||
title: 'Discover Data',
|
||||
description:
|
||||
'Search across KTX wiki pages, semantic-layer sources/measures/dimensions, and raw warehouse schema refs.',
|
||||
inputSchema: discoverDataSchema.shape,
|
||||
},
|
||||
discoverDataSchema,
|
||||
async (input) => jsonToolResult(await discover.search(input)),
|
||||
);
|
||||
}
|
||||
|
||||
if (ports.sqlExecution) {
|
||||
const sqlExecution = ports.sqlExecution;
|
||||
registerParsedTool(
|
||||
server,
|
||||
'sql_execution',
|
||||
{
|
||||
title: 'SQL Execution',
|
||||
description:
|
||||
'Execute one parser-validated read-only SQL query against a configured KTX connection and return structured rows.',
|
||||
inputSchema: sqlExecutionSchema.shape,
|
||||
},
|
||||
sqlExecutionSchema,
|
||||
async (input) => {
|
||||
try {
|
||||
return jsonToolResult(
|
||||
await sqlExecution.execute({
|
||||
connectionId: input.connectionId,
|
||||
sql: input.sql,
|
||||
maxRows: input.maxRows ?? 1000,
|
||||
}),
|
||||
);
|
||||
} catch (error) {
|
||||
return jsonErrorToolResult(error instanceof Error ? error.message : String(error));
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
if (ports.ingest) {
|
||||
const ingest = ports.ingest;
|
||||
registerParsedTool(
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@ export { createDefaultKtxMcpServer, createKtxMcpServer } from './server.js';
|
|||
export type {
|
||||
KtxConnectionSummary,
|
||||
KtxConnectionsMcpPort,
|
||||
KtxDiscoverDataMcpPort,
|
||||
KtxDictionarySearchMcpPort,
|
||||
KtxEntityDetailsMcpPort,
|
||||
KtxIngestDiffSummary,
|
||||
KtxIngestMcpPort,
|
||||
KtxIngestStatusResponse,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,12 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|||
import { AgentRunnerService } from '../agent/index.js';
|
||||
import { FakeSourceAdapter, type MemoryFlowReplayInput } from '../ingest/index.js';
|
||||
import { initKtxProject } from '../project/index.js';
|
||||
import { createKtxConnectorCapabilities, type KtxScanConnector, type KtxSchemaSnapshot } from '../scan/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxQueryResult,
|
||||
type KtxScanConnector,
|
||||
type KtxSchemaSnapshot,
|
||||
} from '../scan/index.js';
|
||||
import { writeLocalSlSource } from '../sl/index.js';
|
||||
import { createLocalProjectMcpContextPorts } from './local-project-ports.js';
|
||||
|
||||
|
|
@ -60,16 +65,119 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
};
|
||||
}
|
||||
|
||||
function testConnector(snapshot = testSnapshot()): KtxScanConnector {
|
||||
function testConnector(snapshot = testSnapshot(), queryResult?: KtxQueryResult): KtxScanConnector {
|
||||
return {
|
||||
id: `test:${snapshot.connectionId}`,
|
||||
driver: snapshot.driver,
|
||||
capabilities: createKtxConnectorCapabilities(),
|
||||
capabilities: createKtxConnectorCapabilities({ readOnlySql: queryResult !== undefined }),
|
||||
introspect: vi.fn(async () => snapshot),
|
||||
executeReadOnly: queryResult === undefined ? undefined : vi.fn(async () => queryResult),
|
||||
cleanup: vi.fn(async () => {}),
|
||||
};
|
||||
}
|
||||
|
||||
async function seedScanReport(projectDir: string, syncId = 'sync-1'): Promise<void> {
|
||||
const root = `raw-sources/warehouse/live-database/${syncId}`;
|
||||
await mkdir(join(projectDir, root, 'tables'), { recursive: true });
|
||||
await writeFile(
|
||||
join(projectDir, root, 'connection.json'),
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-05-14T09:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(projectDir, root, 'tables', 'orders.json'),
|
||||
JSON.stringify(
|
||||
{
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Customer orders',
|
||||
estimatedRows: 12,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(projectDir, root, 'scan-report.json'),
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
syncId,
|
||||
runId: 'scan-1',
|
||||
trigger: 'mcp',
|
||||
mode: 'structural',
|
||||
dryRun: false,
|
||||
artifactPaths: {
|
||||
rawSourcesDir: root,
|
||||
reportPath: `${root}/scan-report.json`,
|
||||
manifestShards: [],
|
||||
enrichmentArtifacts: [],
|
||||
},
|
||||
diffSummary: {
|
||||
tablesAdded: 0,
|
||||
tablesModified: 0,
|
||||
tablesDeleted: 0,
|
||||
tablesUnchanged: 1,
|
||||
columnsAdded: 0,
|
||||
columnsModified: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
manifestShardsWritten: 0,
|
||||
structuralSyncStats: {
|
||||
tablesCreated: 1,
|
||||
tablesUpdated: 0,
|
||||
tablesDeleted: 0,
|
||||
columnsCreated: 0,
|
||||
columnsUpdated: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
enrichment: {
|
||||
dataDictionary: 'skipped',
|
||||
tableDescriptions: 'skipped',
|
||||
columnDescriptions: 'skipped',
|
||||
embeddings: 'skipped',
|
||||
deterministicRelationships: 'skipped',
|
||||
llmRelationshipValidation: 'skipped',
|
||||
statisticalValidation: 'skipped',
|
||||
},
|
||||
capabilityGaps: [],
|
||||
warnings: [],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
|
||||
createdAt: '2026-05-14T09:00:00.000Z',
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'utf-8',
|
||||
);
|
||||
}
|
||||
|
||||
it('lists local project connections from ktx.yaml', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
|
|
@ -119,6 +227,382 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
expect(connector.cleanup).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('executes MCP SQL only after parser-backed validation passes', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
const connector = testConnector(testSnapshot(), {
|
||||
headers: ['id'],
|
||||
headerTypes: ['integer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
});
|
||||
const createConnector = vi.fn(async () => connector);
|
||||
const sqlAnalysis = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
|
||||
};
|
||||
const ports = createLocalProjectMcpContextPorts(project, {
|
||||
sqlAnalysis,
|
||||
localScan: {
|
||||
createConnector,
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
ports.sqlExecution?.execute({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select id from public.orders',
|
||||
maxRows: 5,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
headers: ['id'],
|
||||
headerTypes: ['integer'],
|
||||
rows: [[1]],
|
||||
rowCount: 1,
|
||||
});
|
||||
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select id from public.orders', 'postgres');
|
||||
expect(createConnector).toHaveBeenCalledWith('warehouse');
|
||||
expect(connector.executeReadOnly).toHaveBeenCalledWith(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select id from public.orders',
|
||||
maxRows: 5,
|
||||
},
|
||||
{ runId: 'mcp-sql-execution' },
|
||||
);
|
||||
expect(connector.cleanup).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects MCP SQL before connector execution when parser validation fails', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
const connector = testConnector(testSnapshot(), {
|
||||
headers: ['id'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
});
|
||||
const sqlAnalysis = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(),
|
||||
validateReadOnly: vi.fn(async () => ({
|
||||
ok: false,
|
||||
error: 'SQL contains read/write operation: Insert',
|
||||
})),
|
||||
};
|
||||
const ports = createLocalProjectMcpContextPorts(project, {
|
||||
sqlAnalysis,
|
||||
localScan: {
|
||||
createConnector: vi.fn(async () => connector),
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
ports.sqlExecution?.execute({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'with x as (insert into t values (1) returning *) select * from x',
|
||||
maxRows: 1000,
|
||||
}),
|
||||
).rejects.toThrow('SQL contains read/write operation: Insert');
|
||||
expect(connector.executeReadOnly).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('exposes local scan entity details through MCP ports', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
await seedScanReport(project.projectDir);
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
|
||||
await expect(
|
||||
ports.entityDetails?.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders', columns: ['id'] }],
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
results: [
|
||||
{
|
||||
ok: true,
|
||||
connectionId: 'warehouse',
|
||||
display: 'public.orders',
|
||||
columns: [{ name: 'id', nativeType: 'integer' }],
|
||||
snapshot: { syncId: 'sync-1', scanRunId: 'scan-1' },
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a structured local entity details error when no scan exists', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
|
||||
await expect(
|
||||
ports.entityDetails?.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders' }],
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
results: [
|
||||
{
|
||||
ok: false,
|
||||
connectionId: 'warehouse',
|
||||
error: { code: 'scan_missing' },
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('exposes local dictionary search through MCP ports', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
||||
`${JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
sqlAvailable: true,
|
||||
queryCount: 4,
|
||||
tables: [],
|
||||
columns: {
|
||||
'orders.status': {
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
column: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'string',
|
||||
distinctCount: 2,
|
||||
sampleValues: ['paid', 'refunded'],
|
||||
},
|
||||
},
|
||||
warnings: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Seed dictionary profile',
|
||||
);
|
||||
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
|
||||
await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toMatchObject({
|
||||
searched: [{ connectionId: 'warehouse', status: 'ready' }],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', matchedValue: 'paid' }],
|
||||
misses: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('reports missing local dictionary profiles through MCP ports', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
|
||||
await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toEqual({
|
||||
searched: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 0,
|
||||
syncId: null,
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'no_profile_artifact',
|
||||
},
|
||||
],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [],
|
||||
misses: [{ connectionId: 'warehouse', reason: 'no_profile_artifact' }],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('exposes local project discover_data across wiki, semantic-layer, and raw schema', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
await project.fileStore.writeFile(
|
||||
'wiki/global/orders-playbook.md',
|
||||
[
|
||||
'---',
|
||||
'summary: Paid order operations',
|
||||
'tags: [orders]',
|
||||
'refs: []',
|
||||
'sl_refs: []',
|
||||
'usage_mode: auto',
|
||||
'---',
|
||||
'',
|
||||
'Paid orders are used for customer activity analysis.',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed wiki',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
[
|
||||
'name: orders',
|
||||
'descriptions:',
|
||||
' user: Paid order facts',
|
||||
'table: public.orders',
|
||||
'grain: [id]',
|
||||
'columns:',
|
||||
' - name: status',
|
||||
' type: string',
|
||||
' descriptions:',
|
||||
' user: Payment status',
|
||||
'measures:',
|
||||
' - name: order_count',
|
||||
' expr: count(*)',
|
||||
' description: Number of paid orders',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed sl',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/connection.json',
|
||||
JSON.stringify({ connectionId: 'warehouse', driver: 'postgres', extractedAt: '2026-05-14T09:00:00.000Z' }, null, 2),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed connection',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/tables/public-orders.json',
|
||||
JSON.stringify(
|
||||
{
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Orders table',
|
||||
estimatedRows: 10,
|
||||
columns: [
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Order status',
|
||||
sampleValues: ['paid'],
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed table',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/scan-report.json',
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
syncId: 'sync-1',
|
||||
runId: 'scan-1',
|
||||
trigger: 'mcp',
|
||||
mode: 'enriched',
|
||||
dryRun: false,
|
||||
artifactPaths: {
|
||||
rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
|
||||
reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
|
||||
manifestShards: [],
|
||||
enrichmentArtifacts: [],
|
||||
},
|
||||
diffSummary: {
|
||||
tablesAdded: 1,
|
||||
tablesModified: 0,
|
||||
tablesDeleted: 0,
|
||||
tablesUnchanged: 0,
|
||||
columnsAdded: 0,
|
||||
columnsModified: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
manifestShardsWritten: 0,
|
||||
structuralSyncStats: {
|
||||
tablesCreated: 0,
|
||||
tablesUpdated: 0,
|
||||
tablesDeleted: 0,
|
||||
columnsCreated: 0,
|
||||
columnsUpdated: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
enrichment: {
|
||||
dataDictionary: 'completed',
|
||||
tableDescriptions: 'completed',
|
||||
columnDescriptions: 'completed',
|
||||
embeddings: 'skipped',
|
||||
deterministicRelationships: 'skipped',
|
||||
llmRelationshipValidation: 'skipped',
|
||||
statisticalValidation: 'skipped',
|
||||
},
|
||||
capabilityGaps: [],
|
||||
warnings: [],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
|
||||
createdAt: '2026-05-14T09:00:00.000Z',
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed scan report',
|
||||
);
|
||||
|
||||
const ports = createLocalProjectMcpContextPorts(project);
|
||||
const results = await ports.discover?.search({ query: 'paid orders', connectionId: 'warehouse', limit: 10 });
|
||||
|
||||
expect(results).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ kind: 'wiki', id: 'orders-playbook' }),
|
||||
expect.objectContaining({ kind: 'sl_source', id: 'orders', connectionId: 'warehouse' }),
|
||||
expect.objectContaining({ kind: 'table', id: 'public.orders', connectionId: 'warehouse' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('triggers canonical bundle ingest and reads status, report, and replay through MCP ports', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
project.config.connections.warehouse = {
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import {
|
|||
import { createLocalKtxEmbeddingProviderFromConfig, KtxIngestEmbeddingPortAdapter } from '../llm/index.js';
|
||||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import {
|
||||
createKtxEntityDetailsService,
|
||||
getLocalScanReport,
|
||||
getLocalScanStatus,
|
||||
type KtxConnectionDriver,
|
||||
|
|
@ -26,8 +27,11 @@ import {
|
|||
type LocalScanMcpOptions,
|
||||
runLocalScan,
|
||||
} from '../scan/index.js';
|
||||
import { createKtxDiscoverDataService } from '../search/index.js';
|
||||
import type { SqlAnalysisDialect, SqlAnalysisPort } from '../sql-analysis/index.js';
|
||||
import {
|
||||
compileLocalSlQuery,
|
||||
createKtxDictionarySearchService,
|
||||
type LocalSlSourceSearchResult,
|
||||
type LocalSlSourceSummary,
|
||||
listLocalSlSources,
|
||||
|
|
@ -44,6 +48,7 @@ import type {
|
|||
KtxScanArtifactReadResponse,
|
||||
KtxScanArtifactSummary,
|
||||
KtxScanArtifactType,
|
||||
KtxSqlExecutionResponse,
|
||||
} from './types.js';
|
||||
|
||||
const LOCAL_AUTHOR = 'ktx';
|
||||
|
|
@ -53,6 +58,7 @@ const SL_SHAPE_WARNING = 'Local stdio validation checks YAML shape only; Python
|
|||
interface CreateLocalProjectMcpContextPortsOptions {
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
localIngest?: LocalIngestMcpOptions;
|
||||
localScan?: LocalScanMcpOptions;
|
||||
embeddingService?: KtxEmbeddingPort | null;
|
||||
|
|
@ -77,6 +83,10 @@ function dialectForDriver(driver: string | undefined): string {
|
|||
return map[normalized] ?? 'postgres';
|
||||
}
|
||||
|
||||
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
||||
return dialectForDriver(driver) as SqlAnalysisDialect;
|
||||
}
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
|
|
@ -378,6 +388,53 @@ function statusFromIngestReport(report: IngestReportSnapshot): KtxIngestStatusRe
|
|||
};
|
||||
}
|
||||
|
||||
async function executeValidatedReadOnlySql(
|
||||
project: KtxLocalProject,
|
||||
options: CreateLocalProjectMcpContextPortsOptions,
|
||||
input: { connectionId: string; sql: string; maxRows: number },
|
||||
): Promise<KtxSqlExecutionResponse> {
|
||||
const connectionId = assertSafeConnectionId(input.connectionId);
|
||||
const connection = project.config.connections[connectionId];
|
||||
if (!connection) {
|
||||
throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
|
||||
}
|
||||
if (!options.sqlAnalysis) {
|
||||
throw new Error('sql_execution requires parser-backed SQL validation.');
|
||||
}
|
||||
const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver(connection.driver));
|
||||
if (!validation.ok) {
|
||||
throw new Error(validation.error ?? 'SQL is not read-only.');
|
||||
}
|
||||
const createConnector = options.localScan?.createConnector;
|
||||
if (!createConnector) {
|
||||
throw new Error('sql_execution requires a local scan connector factory.');
|
||||
}
|
||||
|
||||
let connector: KtxScanConnector | null = null;
|
||||
try {
|
||||
connector = await createConnector(connectionId);
|
||||
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
|
||||
throw new Error(`Connection "${connectionId}" does not support read-only SQL execution.`);
|
||||
}
|
||||
const result = await connector.executeReadOnly(
|
||||
{
|
||||
connectionId,
|
||||
sql: input.sql,
|
||||
maxRows: input.maxRows,
|
||||
},
|
||||
{ runId: 'mcp-sql-execution' },
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
|
||||
rows: result.rows,
|
||||
rowCount: result.rowCount ?? result.rows.length,
|
||||
};
|
||||
} finally {
|
||||
await cleanupConnector(connector);
|
||||
}
|
||||
}
|
||||
|
||||
export function createLocalProjectMcpContextPorts(
|
||||
project: KtxLocalProject,
|
||||
options: CreateLocalProjectMcpContextPortsOptions = {},
|
||||
|
|
@ -575,8 +632,31 @@ export function createLocalProjectMcpContextPorts(
|
|||
});
|
||||
},
|
||||
},
|
||||
entityDetails: {
|
||||
async read(input) {
|
||||
return createKtxEntityDetailsService(project).read(input);
|
||||
},
|
||||
},
|
||||
dictionarySearch: {
|
||||
async search(input) {
|
||||
return createKtxDictionarySearchService(project).search(input);
|
||||
},
|
||||
},
|
||||
discover: {
|
||||
async search(input) {
|
||||
return createKtxDiscoverDataService(project, { userId: 'local', embeddingService }).search(input);
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
if (options.sqlAnalysis && options.localScan?.createConnector) {
|
||||
ports.sqlExecution = {
|
||||
async execute(input) {
|
||||
return executeValidatedReadOnlySql(project, options, input);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
if (options.localIngest) {
|
||||
ports.ingest = {
|
||||
async trigger(input) {
|
||||
|
|
|
|||
|
|
@ -6,11 +6,16 @@ import { createLocalProjectMemoryCapture } from '../memory/index.js';
|
|||
import { initKtxProject } from '../project/index.js';
|
||||
import { createKtxMcpServer } from './server.js';
|
||||
import type {
|
||||
KtxDiscoverDataMcpPort,
|
||||
KtxDictionarySearchMcpPort,
|
||||
KtxEntityDetailsMcpPort,
|
||||
KtxIngestMcpPort,
|
||||
KtxKnowledgeMcpPort,
|
||||
KtxMcpContextPorts,
|
||||
KtxScanMcpPort,
|
||||
KtxSemanticLayerMcpPort,
|
||||
KtxSqlExecutionMcpPort,
|
||||
KtxSqlExecutionResponse,
|
||||
MemoryCapturePort,
|
||||
} from './types.js';
|
||||
|
||||
|
|
@ -64,6 +69,242 @@ describe('createKtxMcpServer', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('registers parser-gated sql_execution when the host provides a SQL execution port', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const response: KtxSqlExecutionResponse = {
|
||||
headers: ['status', 'count'],
|
||||
headerTypes: ['text', 'bigint'],
|
||||
rows: [['paid', 42]],
|
||||
rowCount: 1,
|
||||
};
|
||||
const sqlExecution: KtxSqlExecutionMcpPort = {
|
||||
execute: vi.fn<KtxSqlExecutionMcpPort['execute']>().mockResolvedValue(response),
|
||||
};
|
||||
|
||||
createKtxMcpServer({
|
||||
server: fake.server,
|
||||
userContext: { userId: 'local-user' },
|
||||
contextTools: {
|
||||
sqlExecution,
|
||||
},
|
||||
});
|
||||
|
||||
expect(fake.tools.map((tool) => tool.name)).toEqual(['sql_execution']);
|
||||
await expect(
|
||||
getTool(fake.tools, 'sql_execution').handler({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select status, count(*) from public.orders group by status',
|
||||
maxRows: 50,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: JSON.stringify(
|
||||
{
|
||||
headers: ['status', 'count'],
|
||||
headerTypes: ['text', 'bigint'],
|
||||
rows: [['paid', 42]],
|
||||
rowCount: 1,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
},
|
||||
],
|
||||
structuredContent: {
|
||||
headers: ['status', 'count'],
|
||||
headerTypes: ['text', 'bigint'],
|
||||
rows: [['paid', 42]],
|
||||
rowCount: 1,
|
||||
},
|
||||
});
|
||||
expect(sqlExecution.execute).toHaveBeenCalledWith({
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select status, count(*) from public.orders group by status',
|
||||
maxRows: 50,
|
||||
});
|
||||
});
|
||||
|
||||
it('registers entity_details when the host provides an entity-details port', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const entityDetails: KtxEntityDetailsMcpPort = {
|
||||
read: vi.fn<KtxEntityDetailsMcpPort['read']>().mockResolvedValue({
|
||||
results: [
|
||||
{
|
||||
ok: true,
|
||||
connectionId: 'warehouse',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
display: 'public.orders',
|
||||
kind: 'table',
|
||||
comment: 'Customer orders',
|
||||
estimatedRows: 12,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
snapshot: {
|
||||
syncId: 'sync-1',
|
||||
extractedAt: '2026-05-14T09:00:00.000Z',
|
||||
scanRunId: 'scan-1',
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
};
|
||||
|
||||
createKtxMcpServer({
|
||||
server: fake.server,
|
||||
userContext: { userId: 'local-user' },
|
||||
contextTools: { entityDetails },
|
||||
});
|
||||
|
||||
expect(fake.tools.map((tool) => tool.name)).toEqual(['entity_details']);
|
||||
await expect(
|
||||
getTool(fake.tools, 'entity_details').handler({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders', columns: ['id'] }],
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
structuredContent: {
|
||||
results: [
|
||||
{
|
||||
ok: true,
|
||||
connectionId: 'warehouse',
|
||||
display: 'public.orders',
|
||||
columns: [{ name: 'id' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(entityDetails.read).toHaveBeenCalledWith({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders', columns: ['id'] }],
|
||||
});
|
||||
});
|
||||
|
||||
it('registers dictionary_search when the host provides a dictionary-search port', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const dictionarySearch: KtxDictionarySearchMcpPort = {
|
||||
search: vi.fn<KtxDictionarySearchMcpPort['search']>().mockResolvedValue({
|
||||
searched: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 1,
|
||||
syncId: 'sync-1',
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'ready',
|
||||
},
|
||||
],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
columnName: 'status',
|
||||
matchedValue: 'paid',
|
||||
cardinality: 3,
|
||||
},
|
||||
],
|
||||
misses: [],
|
||||
},
|
||||
],
|
||||
}),
|
||||
};
|
||||
|
||||
createKtxMcpServer({
|
||||
server: fake.server,
|
||||
userContext: { userId: 'local-user' },
|
||||
contextTools: { dictionarySearch },
|
||||
});
|
||||
|
||||
expect(fake.tools.map((tool) => tool.name)).toEqual(['dictionary_search']);
|
||||
await expect(
|
||||
getTool(fake.tools, 'dictionary_search').handler({
|
||||
connectionId: 'warehouse',
|
||||
values: ['paid'],
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
structuredContent: {
|
||||
searched: [{ connectionId: 'warehouse', status: 'ready' }],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status' }],
|
||||
misses: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(dictionarySearch.search).toHaveBeenCalledWith({
|
||||
connectionId: 'warehouse',
|
||||
values: ['paid'],
|
||||
});
|
||||
});
|
||||
|
||||
it('registers discover_data when the host provides a discover port', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const discover: KtxDiscoverDataMcpPort = {
|
||||
search: vi.fn<KtxDiscoverDataMcpPort['search']>().mockResolvedValue([
|
||||
{
|
||||
kind: 'table',
|
||||
id: 'public.orders',
|
||||
score: 1,
|
||||
summary: 'Orders table',
|
||||
snippet: 'id, status',
|
||||
matchedOn: 'name',
|
||||
connectionId: 'warehouse',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
},
|
||||
]),
|
||||
};
|
||||
|
||||
createKtxMcpServer({
|
||||
server: fake.server,
|
||||
userContext: { userId: 'local-user' },
|
||||
contextTools: { discover },
|
||||
});
|
||||
|
||||
expect(fake.tools.map((tool) => tool.name)).toEqual(['discover_data']);
|
||||
await expect(
|
||||
getTool(fake.tools, 'discover_data').handler({
|
||||
query: 'orders',
|
||||
connectionId: 'warehouse',
|
||||
kinds: ['table'],
|
||||
limit: 5,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
structuredContent: [
|
||||
{
|
||||
kind: 'table',
|
||||
id: 'public.orders',
|
||||
connectionId: 'warehouse',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(discover.search).toHaveBeenCalledWith({
|
||||
query: 'orders',
|
||||
connectionId: 'warehouse',
|
||||
kinds: ['table'],
|
||||
limit: 5,
|
||||
});
|
||||
});
|
||||
|
||||
it('registers memory capture tools without host app dependencies', async () => {
|
||||
const fake = makeFakeServer();
|
||||
const capture: MemoryCapturePort = {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import type { IngestReportSnapshot, MemoryFlowReplayInput, TableUsageOutput } from '../ingest/index.js';
|
||||
import type { MemoryCaptureService } from '../memory/index.js';
|
||||
import type { KtxEntityDetailsInput, KtxEntityDetailsResponse } from '../scan/entity-details.js';
|
||||
import type { KtxScanMode, KtxScanReport } from '../scan/index.js';
|
||||
import type { KtxDiscoverDataInput, KtxDiscoverDataResponse } from '../search/index.js';
|
||||
import type {
|
||||
KtxDictionarySearchInput,
|
||||
KtxDictionarySearchResponse,
|
||||
SemanticLayerQueryInput,
|
||||
SlDictionaryMatch,
|
||||
SlSearchLaneSummary,
|
||||
|
|
@ -312,10 +316,37 @@ export interface KtxScanMcpPort {
|
|||
readArtifact?(input: { runId: string; path: string }): Promise<KtxScanArtifactReadResponse | null>;
|
||||
}
|
||||
|
||||
export interface KtxEntityDetailsMcpPort {
|
||||
read(input: KtxEntityDetailsInput): Promise<KtxEntityDetailsResponse>;
|
||||
}
|
||||
|
||||
export interface KtxDictionarySearchMcpPort {
|
||||
search(input: KtxDictionarySearchInput): Promise<KtxDictionarySearchResponse>;
|
||||
}
|
||||
|
||||
export interface KtxDiscoverDataMcpPort {
|
||||
search(input: KtxDiscoverDataInput): Promise<KtxDiscoverDataResponse>;
|
||||
}
|
||||
|
||||
export interface KtxSqlExecutionResponse {
|
||||
headers: string[];
|
||||
headerTypes?: string[];
|
||||
rows: unknown[][];
|
||||
rowCount: number;
|
||||
}
|
||||
|
||||
export interface KtxSqlExecutionMcpPort {
|
||||
execute(input: { connectionId: string; sql: string; maxRows: number }): Promise<KtxSqlExecutionResponse>;
|
||||
}
|
||||
|
||||
export interface KtxMcpContextPorts {
|
||||
connections?: KtxConnectionsMcpPort;
|
||||
knowledge?: KtxKnowledgeMcpPort;
|
||||
semanticLayer?: KtxSemanticLayerMcpPort;
|
||||
entityDetails?: KtxEntityDetailsMcpPort;
|
||||
dictionarySearch?: KtxDictionarySearchMcpPort;
|
||||
discover?: KtxDiscoverDataMcpPort;
|
||||
sqlExecution?: KtxSqlExecutionMcpPort;
|
||||
ingest?: KtxIngestMcpPort;
|
||||
scan?: KtxScanMcpPort;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -166,17 +166,17 @@ describe('memory runtime assets', () => {
|
|||
}
|
||||
});
|
||||
|
||||
it('ships only the KTX connectionName sql_execution call shape in writer guidance', async () => {
|
||||
it('ships only the KTX connectionId sql_execution call shape in writer guidance', async () => {
|
||||
const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
|
||||
const bodies = [{ name: '_shared/identifier-verification.md', body: shared }];
|
||||
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
|
||||
expect(shared).toContain('sql_execution({connectionId, sql: "SELECT DISTINCT');
|
||||
expect(shared).toContain('sql_execution({connectionId, sql: "SELECT 1 FROM');
|
||||
|
||||
for (const skillName of verificationWriterSkills) {
|
||||
const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
|
||||
bodies.push({ name: `${skillName}/SKILL.md`, body });
|
||||
expect(body).toContain('sql_execution({connectionName');
|
||||
expect(body).toContain('sql_execution({connectionId');
|
||||
expect(body).not.toContain('sql_execution({ sql');
|
||||
expect(body).not.toContain('session shape');
|
||||
expect(body).not.toContain('connection is already pinned by the ingest session');
|
||||
|
|
@ -186,8 +186,8 @@ describe('memory runtime assets', () => {
|
|||
const calls = sqlExecutionCallBlocks(body);
|
||||
expect(calls.length, `${name} should contain sql_execution guidance`).toBeGreaterThan(0);
|
||||
expect(
|
||||
calls.filter((call) => !call.includes('connectionName')),
|
||||
`${name} has sql_execution calls without connectionName`,
|
||||
calls.filter((call) => !call.includes('connectionId')),
|
||||
`${name} has sql_execution calls without connectionId`,
|
||||
).toEqual([]);
|
||||
expect(body, `${name} has a connectionless multiline sql_execution call`).not.toMatch(
|
||||
/sql_execution\(\{\s*sql\s*:/,
|
||||
|
|
|
|||
291
packages/context/src/scan/entity-details.test.ts
Normal file
291
packages/context/src/scan/entity-details.test.ts
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
|
||||
import { createKtxEntityDetailsService } from './entity-details.js';
|
||||
import type { KtxConnectionDriver, KtxScanReport, KtxSchemaTable } from './types.js';
|
||||
|
||||
describe('createKtxEntityDetailsService', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-entity-details-service-'));
|
||||
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function scanReport(input: {
|
||||
connectionId: string;
|
||||
syncId: string;
|
||||
runId: string;
|
||||
driver?: KtxConnectionDriver;
|
||||
createdAt?: string;
|
||||
}): KtxScanReport {
|
||||
const rawSourcesDir = `raw-sources/${input.connectionId}/live-database/${input.syncId}`;
|
||||
return {
|
||||
connectionId: input.connectionId,
|
||||
driver: input.driver ?? 'postgres',
|
||||
syncId: input.syncId,
|
||||
runId: input.runId,
|
||||
trigger: 'mcp',
|
||||
mode: 'structural',
|
||||
dryRun: false,
|
||||
artifactPaths: {
|
||||
rawSourcesDir,
|
||||
reportPath: `${rawSourcesDir}/scan-report.json`,
|
||||
manifestShards: [],
|
||||
enrichmentArtifacts: [],
|
||||
},
|
||||
diffSummary: {
|
||||
tablesAdded: 0,
|
||||
tablesModified: 0,
|
||||
tablesDeleted: 0,
|
||||
tablesUnchanged: 1,
|
||||
columnsAdded: 0,
|
||||
columnsModified: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
manifestShardsWritten: 0,
|
||||
structuralSyncStats: {
|
||||
tablesCreated: 1,
|
||||
tablesUpdated: 0,
|
||||
tablesDeleted: 0,
|
||||
columnsCreated: 0,
|
||||
columnsUpdated: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
enrichment: {
|
||||
dataDictionary: 'skipped',
|
||||
tableDescriptions: 'skipped',
|
||||
columnDescriptions: 'skipped',
|
||||
embeddings: 'skipped',
|
||||
deterministicRelationships: 'skipped',
|
||||
llmRelationshipValidation: 'skipped',
|
||||
statisticalValidation: 'skipped',
|
||||
},
|
||||
capabilityGaps: [],
|
||||
warnings: [],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
|
||||
createdAt: input.createdAt ?? '2026-05-14T09:00:00.000Z',
|
||||
};
|
||||
}
|
||||
|
||||
function ordersTable(input: { db?: string | null; estimatedRows?: number | null } = {}): KtxSchemaTable {
|
||||
return {
|
||||
catalog: null,
|
||||
db: input.db ?? 'public',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Customer orders',
|
||||
estimatedRows: input.estimatedRows ?? 12,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Order status',
|
||||
},
|
||||
],
|
||||
foreignKeys: [
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: 'public',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fkey',
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
async function seedScan(input: {
|
||||
connectionId?: string;
|
||||
syncId: string;
|
||||
runId: string;
|
||||
driver?: KtxConnectionDriver;
|
||||
extractedAt?: string;
|
||||
tables?: KtxSchemaTable[];
|
||||
}): Promise<void> {
|
||||
const connectionId = input.connectionId ?? 'warehouse';
|
||||
const report = scanReport({
|
||||
connectionId,
|
||||
syncId: input.syncId,
|
||||
runId: input.runId,
|
||||
driver: input.driver,
|
||||
createdAt: input.extractedAt,
|
||||
});
|
||||
const root = report.artifactPaths.rawSourcesDir;
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/connection.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId,
|
||||
driver: report.driver,
|
||||
extractedAt: input.extractedAt ?? report.createdAt,
|
||||
scope: { schemas: ['public'] },
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed connection',
|
||||
);
|
||||
for (const table of input.tables ?? [ordersTable()]) {
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/tables/${table.db ?? 'default'}-${table.name}.json`,
|
||||
JSON.stringify(table, null, 2),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
`seed ${table.name}`,
|
||||
);
|
||||
}
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/scan-report.json`,
|
||||
JSON.stringify(report, null, 2),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed scan report',
|
||||
);
|
||||
}
|
||||
|
||||
it('returns the latest scan snapshot table details for a display string', async () => {
|
||||
await seedScan({ syncId: 'sync-1', runId: 'scan-old', extractedAt: '2026-05-14T08:00:00.000Z' });
|
||||
await seedScan({
|
||||
syncId: 'sync-2',
|
||||
runId: 'scan-new',
|
||||
extractedAt: '2026-05-14T09:00:00.000Z',
|
||||
tables: [ordersTable({ estimatedRows: 99 })],
|
||||
});
|
||||
const service = createKtxEntityDetailsService(project);
|
||||
|
||||
const result = await service.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders' }],
|
||||
});
|
||||
|
||||
expect(result.results).toHaveLength(1);
|
||||
expect(result.results[0]).toMatchObject({
|
||||
ok: true,
|
||||
connectionId: 'warehouse',
|
||||
display: 'public.orders',
|
||||
estimatedRows: 99,
|
||||
snapshot: {
|
||||
syncId: 'sync-2',
|
||||
scanRunId: 'scan-new',
|
||||
extractedAt: '2026-05-14T09:00:00.000Z',
|
||||
},
|
||||
columns: [
|
||||
{ name: 'id', nativeType: 'integer', primaryKey: true },
|
||||
{ name: 'status', nativeType: 'text', nullable: false },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('filters requested columns while keeping full-table foreign keys', async () => {
|
||||
await seedScan({ syncId: 'sync-1', runId: 'scan-1' });
|
||||
const service = createKtxEntityDetailsService(project);
|
||||
|
||||
const result = await service.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: { catalog: null, db: 'public', name: 'orders' }, columns: ['status'] }],
|
||||
});
|
||||
|
||||
expect(result.results[0]).toMatchObject({
|
||||
ok: true,
|
||||
columns: [{ name: 'status' }],
|
||||
foreignKeys: [
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toDb: 'public',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a structured missing-scan error', async () => {
|
||||
const service = createKtxEntityDetailsService(project);
|
||||
|
||||
const result = await service.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders' }],
|
||||
});
|
||||
|
||||
expect(result.results).toEqual([
|
||||
{
|
||||
ok: false,
|
||||
connectionId: 'warehouse',
|
||||
table: 'public.orders',
|
||||
error: {
|
||||
code: 'scan_missing',
|
||||
message: 'No live-database scan found for connection "warehouse"; run `ktx ingest warehouse` or `ktx scan warehouse`.',
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('reports ambiguous bare table names across schemas', async () => {
|
||||
await seedScan({
|
||||
syncId: 'sync-1',
|
||||
runId: 'scan-1',
|
||||
tables: [ordersTable({ db: 'public' }), ordersTable({ db: 'archive' })],
|
||||
});
|
||||
const service = createKtxEntityDetailsService(project);
|
||||
|
||||
const result = await service.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'orders' }],
|
||||
});
|
||||
|
||||
expect(result.results[0]).toMatchObject({
|
||||
ok: false,
|
||||
error: {
|
||||
code: 'ambiguous_table',
|
||||
candidates: [
|
||||
{ tableRef: { catalog: null, db: 'archive', name: 'orders' }, display: 'archive.orders' },
|
||||
{ tableRef: { catalog: null, db: 'public', name: 'orders' }, display: 'public.orders' },
|
||||
],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('reports missing requested columns with available column candidates', async () => {
|
||||
await seedScan({ syncId: 'sync-1', runId: 'scan-1' });
|
||||
const service = createKtxEntityDetailsService(project);
|
||||
|
||||
const result = await service.read({
|
||||
connectionId: 'warehouse',
|
||||
entities: [{ table: 'public.orders', columns: ['status', 'plan_tier'] }],
|
||||
});
|
||||
|
||||
expect(result.results[0]).toMatchObject({
|
||||
ok: false,
|
||||
error: {
|
||||
code: 'column_not_found',
|
||||
message: 'Column(s) not found on public.orders: plan_tier',
|
||||
candidates: ['id', 'status'],
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
315
packages/context/src/scan/entity-details.ts
Normal file
315
packages/context/src/scan/entity-details.ts
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { readLocalScanStructuralSnapshot } from './local-structural-artifacts.js';
|
||||
import type {
|
||||
KtxConnectionDriver,
|
||||
KtxScanReport,
|
||||
KtxSchemaColumn,
|
||||
KtxSchemaSnapshot,
|
||||
KtxSchemaTable,
|
||||
KtxTableRef,
|
||||
} from './types.js';
|
||||
|
||||
export type KtxEntityDetailsTableInput = string | KtxTableRef;
|
||||
|
||||
export interface KtxEntityDetailsInput {
|
||||
connectionId: string;
|
||||
entities: Array<{
|
||||
table: KtxEntityDetailsTableInput;
|
||||
columns?: string[];
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface KtxEntityDetailsSnapshotInfo {
|
||||
syncId: string;
|
||||
extractedAt: string;
|
||||
scanRunId: string | null;
|
||||
}
|
||||
|
||||
export interface KtxEntityDetailsColumn {
|
||||
name: string;
|
||||
nativeType: string;
|
||||
normalizedType: string;
|
||||
dimensionType: KtxSchemaColumn['dimensionType'];
|
||||
nullable: boolean;
|
||||
primaryKey: boolean;
|
||||
comment: string | null;
|
||||
}
|
||||
|
||||
export interface KtxEntityDetailsRecord {
|
||||
ok: true;
|
||||
connectionId: string;
|
||||
tableRef: KtxTableRef;
|
||||
display: string;
|
||||
kind: KtxSchemaTable['kind'];
|
||||
comment: string | null;
|
||||
estimatedRows: number | null;
|
||||
columns: KtxEntityDetailsColumn[];
|
||||
foreignKeys: KtxSchemaTable['foreignKeys'];
|
||||
snapshot: KtxEntityDetailsSnapshotInfo;
|
||||
}
|
||||
|
||||
export type KtxEntityDetailsErrorCode = 'scan_missing' | 'table_not_found' | 'ambiguous_table' | 'column_not_found';
|
||||
|
||||
export interface KtxEntityDetailsErrorResult {
|
||||
ok: false;
|
||||
connectionId: string;
|
||||
table: KtxEntityDetailsTableInput;
|
||||
snapshot?: KtxEntityDetailsSnapshotInfo;
|
||||
error: {
|
||||
code: KtxEntityDetailsErrorCode;
|
||||
message: string;
|
||||
candidates?: Array<{ tableRef: KtxTableRef; display: string }> | string[];
|
||||
};
|
||||
}
|
||||
|
||||
export interface KtxEntityDetailsResponse {
|
||||
results: Array<KtxEntityDetailsRecord | KtxEntityDetailsErrorResult>;
|
||||
}
|
||||
|
||||
interface LatestScan {
|
||||
report: KtxScanReport;
|
||||
snapshot: KtxSchemaSnapshot;
|
||||
}
|
||||
|
||||
interface ResolveResult {
|
||||
table: KtxSchemaTable | null;
|
||||
error?: Omit<KtxEntityDetailsErrorResult['error'], 'message'> & { message: string };
|
||||
}
|
||||
|
||||
function normalize(value: string | null | undefined): string {
|
||||
return (value ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function refsEqual(left: KtxTableRef, right: KtxTableRef): boolean {
|
||||
return (
|
||||
normalize(left.catalog) === normalize(right.catalog) &&
|
||||
normalize(left.db) === normalize(right.db) &&
|
||||
normalize(left.name) === normalize(right.name)
|
||||
);
|
||||
}
|
||||
|
||||
function cleanIdentifierPart(part: string): string {
|
||||
return part.trim().replace(/^["'`\[]|["'`\]]$/g, '');
|
||||
}
|
||||
|
||||
function splitDisplay(display: string): string[] {
|
||||
return display
|
||||
.trim()
|
||||
.split('.')
|
||||
.map(cleanIdentifierPart)
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function displayForTable(driver: KtxConnectionDriver, table: KtxTableRef): string {
|
||||
if (driver === 'sqlite') {
|
||||
return table.name;
|
||||
}
|
||||
return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.');
|
||||
}
|
||||
|
||||
function tableRef(table: KtxSchemaTable): KtxTableRef {
|
||||
return { catalog: table.catalog, db: table.db, name: table.name };
|
||||
}
|
||||
|
||||
function candidateList(
|
||||
driver: KtxConnectionDriver,
|
||||
tables: KtxSchemaTable[],
|
||||
): Array<{ tableRef: KtxTableRef; display: string }> {
|
||||
return tables
|
||||
.map((table) => ({
|
||||
tableRef: tableRef(table),
|
||||
display: displayForTable(driver, table),
|
||||
}))
|
||||
.sort((left, right) => left.display.localeCompare(right.display));
|
||||
}
|
||||
|
||||
function parseDisplayRef(driver: KtxConnectionDriver, display: string): KtxTableRef | null {
|
||||
const parts = splitDisplay(display);
|
||||
if (driver === 'sqlite') {
|
||||
return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null;
|
||||
}
|
||||
if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') {
|
||||
return parts.length === 3 ? { catalog: parts[0]!, db: parts[1]!, name: parts[2]! } : null;
|
||||
}
|
||||
if (parts.length === 2) {
|
||||
return { catalog: null, db: parts[0]!, name: parts[1]! };
|
||||
}
|
||||
if (parts.length === 3) {
|
||||
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveTable(snapshot: KtxSchemaSnapshot, input: KtxEntityDetailsTableInput): ResolveResult {
|
||||
if (typeof input !== 'string') {
|
||||
const table = snapshot.tables.find((candidate) => refsEqual(candidate, input)) ?? null;
|
||||
return table
|
||||
? { table }
|
||||
: {
|
||||
table: null,
|
||||
error: {
|
||||
code: 'table_not_found',
|
||||
message: `Table not found in latest scan: ${displayForTable(snapshot.driver, input)}`,
|
||||
candidates: candidateList(snapshot.driver, snapshot.tables),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const parsed = parseDisplayRef(snapshot.driver, input);
|
||||
if (parsed) {
|
||||
const table = snapshot.tables.find((candidate) => refsEqual(candidate, parsed)) ?? null;
|
||||
return table
|
||||
? { table }
|
||||
: {
|
||||
table: null,
|
||||
error: {
|
||||
code: 'table_not_found',
|
||||
message: `Table not found in latest scan: ${input}`,
|
||||
candidates: candidateList(snapshot.driver, snapshot.tables),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const byName = snapshot.tables.filter((candidate) => normalize(candidate.name) === normalize(input));
|
||||
if (byName.length === 1) {
|
||||
return { table: byName[0]! };
|
||||
}
|
||||
if (byName.length > 1) {
|
||||
return {
|
||||
table: null,
|
||||
error: {
|
||||
code: 'ambiguous_table',
|
||||
message: `Table name "${input}" is ambiguous across schemas/catalogs; pass a structured table ref.`,
|
||||
candidates: candidateList(snapshot.driver, byName),
|
||||
},
|
||||
};
|
||||
}
|
||||
return {
|
||||
table: null,
|
||||
error: {
|
||||
code: 'table_not_found',
|
||||
message: `Table not found in latest scan: ${input}`,
|
||||
candidates: candidateList(snapshot.driver, snapshot.tables),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function toColumn(column: KtxSchemaColumn): KtxEntityDetailsColumn {
|
||||
return {
|
||||
name: column.name,
|
||||
nativeType: column.nativeType,
|
||||
normalizedType: column.normalizedType,
|
||||
dimensionType: column.dimensionType,
|
||||
nullable: column.nullable,
|
||||
primaryKey: column.primaryKey,
|
||||
comment: column.comment,
|
||||
};
|
||||
}
|
||||
|
||||
function snapshotInfo(report: KtxScanReport, snapshot: KtxSchemaSnapshot): KtxEntityDetailsSnapshotInfo {
|
||||
return {
|
||||
syncId: report.syncId,
|
||||
extractedAt: snapshot.extractedAt,
|
||||
scanRunId: report.runId ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
async function readJson<T>(project: KtxLocalProject, path: string): Promise<T> {
|
||||
return JSON.parse((await project.fileStore.readFile(path)).content) as T;
|
||||
}
|
||||
|
||||
async function latestScan(project: KtxLocalProject, connectionId: string): Promise<LatestScan | null> {
|
||||
const root = `raw-sources/${connectionId}/live-database`;
|
||||
let listed;
|
||||
try {
|
||||
listed = await project.fileStore.listFiles(root);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const reportPath = listed.files.filter((path) => path.endsWith('/scan-report.json')).sort().at(-1);
|
||||
if (!reportPath) {
|
||||
return null;
|
||||
}
|
||||
const report = await readJson<KtxScanReport>(project, reportPath);
|
||||
const rawSourcesDir = report.artifactPaths.rawSourcesDir ?? reportPath.slice(0, -'/scan-report.json'.length);
|
||||
const snapshot = await readLocalScanStructuralSnapshot({
|
||||
project,
|
||||
connectionId,
|
||||
driver: report.driver,
|
||||
rawSourcesDir,
|
||||
extractedAtFallback: report.createdAt,
|
||||
});
|
||||
return { report, snapshot };
|
||||
}
|
||||
|
||||
export function createKtxEntityDetailsService(project: KtxLocalProject) {
|
||||
return {
|
||||
async read(input: KtxEntityDetailsInput): Promise<KtxEntityDetailsResponse> {
|
||||
const scan = await latestScan(project, input.connectionId);
|
||||
if (!scan) {
|
||||
return {
|
||||
results: input.entities.map((entity) => ({
|
||||
ok: false,
|
||||
connectionId: input.connectionId,
|
||||
table: entity.table,
|
||||
error: {
|
||||
code: 'scan_missing',
|
||||
message: `No live-database scan found for connection "${input.connectionId}"; run \`ktx ingest ${input.connectionId}\` or \`ktx scan ${input.connectionId}\`.`,
|
||||
},
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
const info = snapshotInfo(scan.report, scan.snapshot);
|
||||
const results: KtxEntityDetailsResponse['results'] = [];
|
||||
for (const entity of input.entities) {
|
||||
const resolved = resolveTable(scan.snapshot, entity.table);
|
||||
if (!resolved.table) {
|
||||
results.push({
|
||||
ok: false,
|
||||
connectionId: input.connectionId,
|
||||
table: entity.table,
|
||||
snapshot: info,
|
||||
error: resolved.error!,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const requested = new Set((entity.columns ?? []).map((column) => normalize(column)));
|
||||
const columns = requested.size
|
||||
? resolved.table.columns.filter((column) => requested.has(normalize(column.name)))
|
||||
: resolved.table.columns;
|
||||
if (requested.size && columns.length !== requested.size) {
|
||||
const found = new Set(columns.map((column) => normalize(column.name)));
|
||||
const missing = [...requested].filter((column) => !found.has(column));
|
||||
results.push({
|
||||
ok: false,
|
||||
connectionId: input.connectionId,
|
||||
table: entity.table,
|
||||
snapshot: info,
|
||||
error: {
|
||||
code: 'column_not_found',
|
||||
message: `Column(s) not found on ${displayForTable(scan.snapshot.driver, resolved.table)}: ${missing.join(', ')}`,
|
||||
candidates: resolved.table.columns.map((column) => column.name),
|
||||
},
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push({
|
||||
ok: true,
|
||||
connectionId: input.connectionId,
|
||||
tableRef: tableRef(resolved.table),
|
||||
display: displayForTable(scan.snapshot.driver, resolved.table),
|
||||
kind: resolved.table.kind,
|
||||
comment: resolved.table.comment,
|
||||
estimatedRows: resolved.table.estimatedRows,
|
||||
columns: columns.map(toColumn),
|
||||
foreignKeys: resolved.table.foreignKeys,
|
||||
snapshot: info,
|
||||
});
|
||||
}
|
||||
return { results };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -60,6 +60,24 @@ export {
|
|||
ktxScanErrorMessage,
|
||||
skippedKtxScanEnrichmentSummary,
|
||||
} from './enrichment-summary.js';
|
||||
export type {
|
||||
KtxEntityDetailsColumn,
|
||||
KtxEntityDetailsErrorCode,
|
||||
KtxEntityDetailsErrorResult,
|
||||
KtxEntityDetailsInput,
|
||||
KtxEntityDetailsRecord,
|
||||
KtxEntityDetailsResponse,
|
||||
KtxEntityDetailsSnapshotInfo,
|
||||
KtxEntityDetailsTableInput,
|
||||
} from './entity-details.js';
|
||||
export { createKtxEntityDetailsService } from './entity-details.js';
|
||||
export type {
|
||||
DisplayTargetResolution,
|
||||
RawSchemaHit,
|
||||
TableDetail,
|
||||
WarehouseCatalogServiceDeps,
|
||||
} from './warehouse-catalog.js';
|
||||
export { WarehouseCatalogService } from './warehouse-catalog.js';
|
||||
export type {
|
||||
KtxColumnSampleUpdate,
|
||||
KtxDescriptionSource,
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@ import { mkdtemp, rm } from 'node:fs/promises';
|
|||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../../../project/index.js';
|
||||
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
|
||||
import { WarehouseCatalogService } from './warehouse-catalog.js';
|
||||
|
||||
describe('WarehouseCatalogService', () => {
|
||||
let tempDir: string;
|
||||
|
|
@ -18,8 +18,8 @@ describe('WarehouseCatalogService', () => {
|
|||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-2', driver = 'postgres') {
|
||||
const root = `raw-sources/${connectionName}/live-database/${syncId}`;
|
||||
async function seedLiveDatabaseScan(connectionId = 'warehouse', syncId = 'sync-2', driver = 'postgres') {
|
||||
const root = `raw-sources/${connectionId}/live-database/${syncId}`;
|
||||
const tableRef = {
|
||||
catalog: driver === 'bigquery' ? 'analytics' : null,
|
||||
db: driver === 'sqlite' ? null : 'public',
|
||||
|
|
@ -27,7 +27,7 @@ describe('WarehouseCatalogService', () => {
|
|||
};
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/connection.json`,
|
||||
JSON.stringify({ connectionId: connectionName, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
|
||||
JSON.stringify({ connectionId, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed connection',
|
||||
|
|
@ -75,7 +75,7 @@ describe('WarehouseCatalogService', () => {
|
|||
`${root}/enrichment/relationship-profile.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: connectionName,
|
||||
connectionId,
|
||||
driver,
|
||||
sqlAvailable: true,
|
||||
queryCount: 3,
|
||||
|
|
@ -113,10 +113,10 @@ describe('WarehouseCatalogService', () => {
|
|||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.getLatestSyncId('warehouse')).resolves.toBe('sync-2');
|
||||
const detail = await catalog.getTable({ connectionName: 'warehouse', catalog: null, db: 'public', name: 'orders' });
|
||||
const detail = await catalog.getTable({ connectionId: 'warehouse', catalog: null, db: 'public', name: 'orders' });
|
||||
|
||||
expect(detail).toMatchObject({
|
||||
connectionName: 'warehouse',
|
||||
connectionId: 'warehouse',
|
||||
display: 'public.orders',
|
||||
rowCount: 12,
|
||||
columns: [
|
||||
|
|
@ -124,11 +124,20 @@ describe('WarehouseCatalogService', () => {
|
|||
{ name: 'status', nativeType: 'text', sampleValues: ['paid', 'refunded'], distinctCount: 2 },
|
||||
],
|
||||
});
|
||||
expect(detail).not.toHaveProperty(['connection', 'Name'].join(''));
|
||||
|
||||
const hits = await catalog.searchByName('warehouse', 'orders', 5);
|
||||
expect(hits[0]).toMatchObject({
|
||||
kind: 'table',
|
||||
connectionId: 'warehouse',
|
||||
display: 'public.orders',
|
||||
});
|
||||
expect(hits[0]).not.toHaveProperty(['connection', 'Name'].join(''));
|
||||
});
|
||||
|
||||
it('returns scanAvailable=false when no live-database scan exists', async () => {
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
await expect(catalog.getTable({ connectionName: 'missing', catalog: null, db: 'public', name: 'orders' })).resolves.toBeNull();
|
||||
await expect(catalog.getTable({ connectionId: 'missing', catalog: null, db: 'public', name: 'orders' })).resolves.toBeNull();
|
||||
await expect(catalog.hasScan('missing')).resolves.toBe(false);
|
||||
});
|
||||
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
import { getDialectForDriver } from '../../../connections/index.js';
|
||||
import type { KtxFileStorePort } from '../../../core/index.js';
|
||||
import { getDialectForDriver } from '../connections/index.js';
|
||||
import type { KtxFileStorePort } from '../core/index.js';
|
||||
import type {
|
||||
KtxConnectionDriver,
|
||||
KtxSchemaColumn,
|
||||
KtxSchemaForeignKey,
|
||||
KtxSchemaTable,
|
||||
KtxTableRef,
|
||||
} from '../../../scan/types.js';
|
||||
} from './types.js';
|
||||
|
||||
type CatalogDriver = KtxConnectionDriver | 'sqlite3';
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ interface WarehouseColumnDetail extends KtxSchemaColumn {
|
|||
}
|
||||
|
||||
export interface TableDetail {
|
||||
connectionName: string;
|
||||
connectionId: string;
|
||||
catalog: string | null;
|
||||
db: string | null;
|
||||
name: string;
|
||||
|
|
@ -40,14 +40,14 @@ export interface TableDetail {
|
|||
export type RawSchemaHit =
|
||||
| {
|
||||
kind: 'table';
|
||||
connectionName: string;
|
||||
connectionId: string;
|
||||
ref: KtxTableRef;
|
||||
display: string;
|
||||
matchedOn: 'name' | 'db' | 'comment' | 'description';
|
||||
}
|
||||
| {
|
||||
kind: 'column';
|
||||
connectionName: string;
|
||||
connectionId: string;
|
||||
ref: KtxTableRef & { column: string };
|
||||
display: string;
|
||||
matchedOn: 'name' | 'comment' | 'description';
|
||||
|
|
@ -80,7 +80,7 @@ interface RelationshipProfileArtifact {
|
|||
}
|
||||
|
||||
interface ConnectionCatalog {
|
||||
connectionName: string;
|
||||
connectionId: string;
|
||||
syncId: string;
|
||||
driver: CatalogDriver;
|
||||
tables: KtxSchemaTable[];
|
||||
|
|
@ -250,21 +250,21 @@ export class WarehouseCatalogService {
|
|||
|
||||
constructor(private readonly deps: WarehouseCatalogServiceDeps) {}
|
||||
|
||||
async hasScan(connectionName: string): Promise<boolean> {
|
||||
return (await this.loadCatalog(connectionName)) !== null;
|
||||
async hasScan(connectionId: string): Promise<boolean> {
|
||||
return (await this.loadCatalog(connectionId)) !== null;
|
||||
}
|
||||
|
||||
async getLatestSyncId(connectionName: string): Promise<string | null> {
|
||||
return (await this.loadCatalog(connectionName))?.syncId ?? null;
|
||||
async getLatestSyncId(connectionId: string): Promise<string | null> {
|
||||
return (await this.loadCatalog(connectionId))?.syncId ?? null;
|
||||
}
|
||||
|
||||
async listTables(connectionName: string): Promise<KtxTableRef[]> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
async listTables(connectionId: string): Promise<KtxTableRef[]> {
|
||||
const catalog = await this.loadCatalog(connectionId);
|
||||
return catalog?.tables.map((table) => ({ catalog: table.catalog, db: table.db, name: table.name })) ?? [];
|
||||
}
|
||||
|
||||
async getTable(ref: { connectionName: string } & KtxTableRef): Promise<TableDetail | null> {
|
||||
const catalog = await this.loadCatalog(ref.connectionName);
|
||||
async getTable(ref: { connectionId: string } & KtxTableRef): Promise<TableDetail | null> {
|
||||
const catalog = await this.loadCatalog(ref.connectionId);
|
||||
if (!catalog) {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -277,7 +277,7 @@ export class WarehouseCatalogService {
|
|||
const profileColumns = catalog.profile?.columns ?? {};
|
||||
|
||||
return {
|
||||
connectionName: ref.connectionName,
|
||||
connectionId: ref.connectionId,
|
||||
catalog: table.catalog,
|
||||
db: table.db,
|
||||
name: table.name,
|
||||
|
|
@ -310,14 +310,14 @@ export class WarehouseCatalogService {
|
|||
}
|
||||
|
||||
async resolveDisplay(
|
||||
connectionName: string,
|
||||
connectionId: string,
|
||||
display: string,
|
||||
): Promise<{
|
||||
resolved: KtxTableRef | null;
|
||||
candidates: KtxTableRef[];
|
||||
dialect: string;
|
||||
}> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
const catalog = await this.loadCatalog(connectionId);
|
||||
if (!catalog) {
|
||||
return { resolved: null, candidates: [], dialect: 'unknown' };
|
||||
}
|
||||
|
|
@ -333,14 +333,14 @@ export class WarehouseCatalogService {
|
|||
return { resolved: { catalog: table.catalog, db: table.db, name: table.name }, candidates: [], dialect };
|
||||
}
|
||||
|
||||
async resolveDisplayTarget(connectionName: string, display: string): Promise<DisplayTargetResolution> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
async resolveDisplayTarget(connectionId: string, display: string): Promise<DisplayTargetResolution> {
|
||||
const catalog = await this.loadCatalog(connectionId);
|
||||
if (!catalog) {
|
||||
return { resolved: null, candidates: [], dialect: 'unknown' };
|
||||
}
|
||||
|
||||
const dialect = getDialectForDriver(catalog.driver).type;
|
||||
const tableResolution = await this.resolveDisplay(connectionName, display);
|
||||
const tableResolution = await this.resolveDisplay(connectionId, display);
|
||||
if (tableResolution.resolved) {
|
||||
return tableResolution;
|
||||
}
|
||||
|
|
@ -367,8 +367,8 @@ export class WarehouseCatalogService {
|
|||
};
|
||||
}
|
||||
|
||||
async searchByName(connectionName: string, query: string, limit: number): Promise<RawSchemaHit[]> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
async searchByName(connectionId: string, query: string, limit: number): Promise<RawSchemaHit[]> {
|
||||
const catalog = await this.loadCatalog(connectionId);
|
||||
if (!catalog) {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -378,7 +378,7 @@ export class WarehouseCatalogService {
|
|||
if (tableMatch) {
|
||||
hits.push({
|
||||
kind: 'table',
|
||||
connectionName,
|
||||
connectionId,
|
||||
ref: { catalog: table.catalog, db: table.db, name: table.name },
|
||||
display: formatDisplay(catalog.driver, table),
|
||||
matchedOn: tableMatch,
|
||||
|
|
@ -391,7 +391,7 @@ export class WarehouseCatalogService {
|
|||
}
|
||||
hits.push({
|
||||
kind: 'column',
|
||||
connectionName,
|
||||
connectionId,
|
||||
ref: { catalog: table.catalog, db: table.db, name: table.name, column: column.name },
|
||||
display: `${formatDisplay(catalog.driver, table)}.${column.name}`,
|
||||
matchedOn: columnMatch,
|
||||
|
|
@ -401,18 +401,18 @@ export class WarehouseCatalogService {
|
|||
return hits.slice(0, Math.max(0, limit));
|
||||
}
|
||||
|
||||
private loadCatalog(connectionName: string): Promise<ConnectionCatalog | null> {
|
||||
const existing = this.catalogs.get(connectionName);
|
||||
private loadCatalog(connectionId: string): Promise<ConnectionCatalog | null> {
|
||||
const existing = this.catalogs.get(connectionId);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
const pending = this.readCatalog(connectionName);
|
||||
this.catalogs.set(connectionName, pending);
|
||||
const pending = this.readCatalog(connectionId);
|
||||
this.catalogs.set(connectionId, pending);
|
||||
return pending;
|
||||
}
|
||||
|
||||
private async readCatalog(connectionName: string): Promise<ConnectionCatalog | null> {
|
||||
const root = `raw-sources/${connectionName}/live-database`;
|
||||
private async readCatalog(connectionId: string): Promise<ConnectionCatalog | null> {
|
||||
const root = `raw-sources/${connectionId}/live-database`;
|
||||
const listed = await this.deps.fileStore.listFiles(root);
|
||||
const connectionFiles = listed.files.filter((file) => file.endsWith('/connection.json')).sort();
|
||||
const latestConnectionPath = connectionFiles.at(-1);
|
||||
|
|
@ -438,7 +438,7 @@ export class WarehouseCatalogService {
|
|||
}
|
||||
|
||||
return {
|
||||
connectionName,
|
||||
connectionId,
|
||||
syncId,
|
||||
driver: connection.driver ?? profile?.driver ?? 'postgres',
|
||||
tables,
|
||||
264
packages/context/src/search/discover.test.ts
Normal file
264
packages/context/src/search/discover.test.ts
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
|
||||
import { writeLocalKnowledgePage } from '../wiki/local-knowledge.js';
|
||||
import { createKtxDiscoverDataService } from './discover.js';
|
||||
|
||||
describe('createKtxDiscoverDataService', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-discover-data-'));
|
||||
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
||||
project.config.connections.warehouse = { driver: 'postgres', url: 'env:DATABASE_URL' };
|
||||
project.config.connections.billing = { driver: 'postgres', url: 'env:BILLING_DATABASE_URL' };
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function seedWiki(): Promise<void> {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'orders-playbook',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Paid order operations',
|
||||
content: 'Use paid orders and order_count to inspect monthly customer activity for Acme Corp.',
|
||||
tags: ['orders'],
|
||||
});
|
||||
}
|
||||
|
||||
async function seedSl(): Promise<void> {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
[
|
||||
'name: orders',
|
||||
'descriptions:',
|
||||
' user: Paid order facts',
|
||||
'table: public.orders',
|
||||
'grain: [id]',
|
||||
'columns:',
|
||||
' - name: status',
|
||||
' type: string',
|
||||
' descriptions:',
|
||||
' user: Payment status for the order',
|
||||
' - name: ordered_at',
|
||||
' type: time',
|
||||
'measures:',
|
||||
' - name: order_count',
|
||||
' expr: count(*)',
|
||||
' description: Number of paid orders',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed sl source',
|
||||
);
|
||||
}
|
||||
|
||||
async function seedScan(input: {
|
||||
connectionId?: string;
|
||||
syncId: string;
|
||||
tableName?: string;
|
||||
comment?: string;
|
||||
sampleValues?: string[];
|
||||
}): Promise<void> {
|
||||
const connectionId = input.connectionId ?? 'warehouse';
|
||||
const root = `raw-sources/${connectionId}/live-database/${input.syncId}`;
|
||||
const tableName = input.tableName ?? 'orders';
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/connection.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId,
|
||||
driver: 'postgres',
|
||||
extractedAt: `2026-05-14T09:00:00.000Z`,
|
||||
scope: { schemas: ['public'] },
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed scan connection',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/tables/public-${tableName}.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
name: tableName,
|
||||
kind: 'table',
|
||||
comment: input.comment ?? 'Orders table from warehouse',
|
||||
estimatedRows: 123,
|
||||
descriptions: { db: input.comment ?? 'Orders table from warehouse' },
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Order status',
|
||||
sampleValues: input.sampleValues ?? ['paid', 'pending'],
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed table',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/scan-report.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId,
|
||||
driver: 'postgres',
|
||||
syncId: input.syncId,
|
||||
runId: `scan-${input.syncId}`,
|
||||
trigger: 'mcp',
|
||||
mode: 'enriched',
|
||||
dryRun: false,
|
||||
artifactPaths: {
|
||||
rawSourcesDir: root,
|
||||
reportPath: `${root}/scan-report.json`,
|
||||
manifestShards: [],
|
||||
enrichmentArtifacts: [],
|
||||
},
|
||||
diffSummary: {
|
||||
tablesAdded: 1,
|
||||
tablesModified: 0,
|
||||
tablesDeleted: 0,
|
||||
tablesUnchanged: 0,
|
||||
columnsAdded: 0,
|
||||
columnsModified: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
manifestShardsWritten: 0,
|
||||
structuralSyncStats: {
|
||||
tablesCreated: 0,
|
||||
tablesUpdated: 0,
|
||||
tablesDeleted: 0,
|
||||
columnsCreated: 0,
|
||||
columnsUpdated: 0,
|
||||
columnsDeleted: 0,
|
||||
},
|
||||
enrichment: {
|
||||
dataDictionary: 'completed',
|
||||
tableDescriptions: 'completed',
|
||||
columnDescriptions: 'completed',
|
||||
embeddings: 'skipped',
|
||||
deterministicRelationships: 'skipped',
|
||||
llmRelationshipValidation: 'skipped',
|
||||
statisticalValidation: 'skipped',
|
||||
},
|
||||
capabilityGaps: [],
|
||||
warnings: [],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
|
||||
createdAt: '2026-05-14T09:00:00.000Z',
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed scan report',
|
||||
);
|
||||
}
|
||||
|
||||
it('returns unified ranked refs across wiki, semantic-layer, and raw schema', async () => {
|
||||
await seedWiki();
|
||||
await seedSl();
|
||||
await seedScan({ syncId: 'sync-1', sampleValues: ['paid', 'refunded'] });
|
||||
const service = createKtxDiscoverDataService(project, { userId: 'local-user' });
|
||||
|
||||
const results = await service.search({ query: 'paid orders', connectionId: 'warehouse', limit: 10 });
|
||||
|
||||
expect(results.map((result) => result.kind)).toEqual(
|
||||
expect.arrayContaining(['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column']),
|
||||
);
|
||||
expect(results.every((result) => result.score >= 0 && result.score <= 1)).toBe(true);
|
||||
expect(results.every((result) => result.snippet === null || result.snippet.length <= 200)).toBe(true);
|
||||
expect(results).toContainEqual(
|
||||
expect.objectContaining({
|
||||
kind: 'table',
|
||||
id: 'public.orders',
|
||||
connectionId: 'warehouse',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
matchedOn: expect.stringMatching(/name|description|comment|display/),
|
||||
}),
|
||||
);
|
||||
expect(results).toContainEqual(
|
||||
expect.objectContaining({
|
||||
kind: 'column',
|
||||
id: 'public.orders.status',
|
||||
connectionId: 'warehouse',
|
||||
columnName: 'status',
|
||||
matchedOn: expect.stringMatching(/name|comment|description|sample_value/),
|
||||
}),
|
||||
);
|
||||
expect(results).toContainEqual(
|
||||
expect.objectContaining({
|
||||
kind: 'sl_measure',
|
||||
id: 'orders.order_count',
|
||||
connectionId: 'warehouse',
|
||||
summary: 'Number of paid orders',
|
||||
snippet: 'count(*)',
|
||||
matchedOn: expect.stringMatching(/name|description|expr/),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('honors kind filters and connection scope', async () => {
|
||||
await seedWiki();
|
||||
await seedSl();
|
||||
await seedScan({ syncId: 'sync-1', connectionId: 'warehouse', tableName: 'orders' });
|
||||
await seedScan({ syncId: 'sync-2', connectionId: 'billing', tableName: 'invoices', comment: 'Billing invoices' });
|
||||
const service = createKtxDiscoverDataService(project);
|
||||
|
||||
const results = await service.search({
|
||||
query: 'orders',
|
||||
connectionId: 'warehouse',
|
||||
kinds: ['table', 'column'],
|
||||
limit: 10,
|
||||
});
|
||||
|
||||
expect(results.every((result) => result.kind === 'table' || result.kind === 'column')).toBe(true);
|
||||
expect(results.every((result) => result.connectionId === 'warehouse')).toBe(true);
|
||||
expect(results.some((result) => result.id.includes('invoices'))).toBe(false);
|
||||
expect(results.some((result) => result.kind === 'wiki')).toBe(false);
|
||||
});
|
||||
|
||||
it('re-reads the latest scan artifacts on each call', async () => {
|
||||
await seedScan({ syncId: 'sync-1', tableName: 'orders', comment: 'Old orders table' });
|
||||
const service = createKtxDiscoverDataService(project);
|
||||
await expect(
|
||||
service.search({ query: 'orders', connectionId: 'warehouse', kinds: ['table'], limit: 10 }),
|
||||
).resolves.toEqual(expect.arrayContaining([expect.objectContaining({ id: 'public.orders' })]));
|
||||
|
||||
await seedScan({ syncId: 'sync-2', tableName: 'invoices', comment: 'Invoice facts' });
|
||||
const fresh = await service.search({ query: 'invoice', connectionId: 'warehouse', kinds: ['table'], limit: 10 });
|
||||
|
||||
expect(fresh).toEqual(expect.arrayContaining([expect.objectContaining({ id: 'public.invoices' })]));
|
||||
expect(fresh.some((result) => result.id === 'public.orders')).toBe(false);
|
||||
});
|
||||
});
|
||||
466
packages/context/src/search/discover.ts
Normal file
466
packages/context/src/search/discover.ts
Normal file
|
|
@ -0,0 +1,466 @@
|
|||
import type { KtxEmbeddingPort } from '../core/index.js';
|
||||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import type { KtxScanReport, KtxSchemaColumn, KtxSchemaTable, KtxTableRef } from '../scan/index.js';
|
||||
import { DEFAULT_PRIORITY, loadLocalSlSourceRecords, resolveDescription } from '../sl/index.js';
|
||||
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
|
||||
import { HybridSearchCore, type FusedSearchCandidate, type SearchCandidateGenerator } from './index.js';
|
||||
|
||||
export type KtxDiscoverDataKind = 'wiki' | 'sl_source' | 'sl_measure' | 'sl_dimension' | 'table' | 'column';
|
||||
export type KtxDiscoverDataMatchedOn = 'name' | 'display' | 'description' | 'comment' | 'expr' | 'sample_value' | 'body';
|
||||
|
||||
export interface KtxDiscoverDataInput {
|
||||
query: string;
|
||||
connectionId?: string;
|
||||
kinds?: KtxDiscoverDataKind[];
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export interface KtxDiscoverDataRef {
|
||||
kind: KtxDiscoverDataKind;
|
||||
id: string;
|
||||
score: number;
|
||||
summary: string | null;
|
||||
snippet: string | null;
|
||||
matchedOn: KtxDiscoverDataMatchedOn;
|
||||
connectionId?: string;
|
||||
tableRef?: KtxTableRef;
|
||||
columnName?: string;
|
||||
}
|
||||
|
||||
export type KtxDiscoverDataResponse = KtxDiscoverDataRef[];
|
||||
|
||||
export interface KtxDiscoverDataServiceOptions {
|
||||
userId?: string;
|
||||
embeddingService?: KtxEmbeddingPort | null;
|
||||
}
|
||||
|
||||
interface CandidateRecord {
|
||||
ref: Omit<KtxDiscoverDataRef, 'score'>;
|
||||
rankScore: number;
|
||||
}
|
||||
|
||||
type RawTable = KtxSchemaTable & {
|
||||
descriptions?: Record<string, string>;
|
||||
columns: Array<KtxSchemaColumn & { descriptions?: Record<string, string>; sampleValues?: unknown[] }>;
|
||||
};
|
||||
|
||||
interface LatestScan {
|
||||
report: KtxScanReport;
|
||||
rawSourcesDir: string;
|
||||
tables: RawTable[];
|
||||
}
|
||||
|
||||
const ALL_KINDS: KtxDiscoverDataKind[] = ['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column'];
|
||||
|
||||
function normalize(value: string | null | undefined): string {
|
||||
return (value ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function queryTerms(query: string): string[] {
|
||||
return query
|
||||
.toLowerCase()
|
||||
.split(/[^a-z0-9_]+/u)
|
||||
.map((term) => term.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function hasKind(kinds: ReadonlySet<KtxDiscoverDataKind>, kind: KtxDiscoverDataKind): boolean {
|
||||
return kinds.has(kind);
|
||||
}
|
||||
|
||||
function cap200(value: string | null | undefined): string | null {
|
||||
if (!value) {
|
||||
return null;
|
||||
}
|
||||
const compact = value.replace(/\s+/g, ' ').trim();
|
||||
return compact.length > 200 ? compact.slice(0, 200) : compact;
|
||||
}
|
||||
|
||||
function snippetAround(text: string | null | undefined, terms: readonly string[]): string | null {
|
||||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
const lower = text.toLowerCase();
|
||||
const index =
|
||||
terms
|
||||
.map((term) => lower.indexOf(term))
|
||||
.filter((position) => position >= 0)
|
||||
.sort((a, b) => a - b)[0] ?? 0;
|
||||
return cap200(text.slice(Math.max(0, index - 60), index + 140));
|
||||
}
|
||||
|
||||
function textScore(value: string | null | undefined, terms: readonly string[]): number {
|
||||
const haystack = normalize(value);
|
||||
if (!haystack || terms.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
const matched = terms.filter((term) => haystack.includes(term)).length;
|
||||
return matched / terms.length;
|
||||
}
|
||||
|
||||
function bestField(
|
||||
fields: Array<{ matchedOn: KtxDiscoverDataMatchedOn; text: string | null | undefined; weight: number }>,
|
||||
terms: readonly string[],
|
||||
): { matchedOn: KtxDiscoverDataMatchedOn; score: number; text: string | null } | null {
|
||||
const scored = fields
|
||||
.map((field) => ({
|
||||
matchedOn: field.matchedOn,
|
||||
score: textScore(field.text, terms) * field.weight,
|
||||
text: field.text ?? null,
|
||||
}))
|
||||
.filter((field) => field.score > 0)
|
||||
.sort((left, right) => right.score - left.score || left.matchedOn.localeCompare(right.matchedOn));
|
||||
return scored[0] ?? null;
|
||||
}
|
||||
|
||||
function displayForTable(table: KtxTableRef): string {
|
||||
return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.');
|
||||
}
|
||||
|
||||
function tableRef(table: KtxSchemaTable): KtxTableRef {
|
||||
return { catalog: table.catalog, db: table.db, name: table.name };
|
||||
}
|
||||
|
||||
async function readJson<T>(project: KtxLocalProject, path: string): Promise<T> {
|
||||
return JSON.parse((await project.fileStore.readFile(path)).content) as T;
|
||||
}
|
||||
|
||||
async function latestScan(project: KtxLocalProject, connectionId: string): Promise<LatestScan | null> {
|
||||
const root = `raw-sources/${connectionId}/live-database`;
|
||||
let files: string[];
|
||||
try {
|
||||
files = (await project.fileStore.listFiles(root)).files;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
const reportPath = files
|
||||
.filter((path) => path.endsWith('/scan-report.json'))
|
||||
.sort()
|
||||
.at(-1);
|
||||
if (!reportPath) {
|
||||
return null;
|
||||
}
|
||||
const report = await readJson<KtxScanReport>(project, reportPath);
|
||||
const rawSourcesDir = report.artifactPaths.rawSourcesDir ?? reportPath.slice(0, -'/scan-report.json'.length);
|
||||
const listedTables = await project.fileStore.listFiles(`${rawSourcesDir}/tables`);
|
||||
const tables: RawTable[] = [];
|
||||
for (const path of listedTables.files.filter((file) => file.endsWith('.json')).sort()) {
|
||||
tables.push(await readJson<RawTable>(project, path));
|
||||
}
|
||||
return { report, rawSourcesDir, tables };
|
||||
}
|
||||
|
||||
function configuredConnectionIds(project: KtxLocalProject, connectionId?: string): string[] {
|
||||
return connectionId ? [connectionId] : Object.keys(project.config.connections).sort();
|
||||
}
|
||||
|
||||
async function wikiCandidates(
|
||||
project: KtxLocalProject,
|
||||
input: KtxDiscoverDataInput,
|
||||
options: KtxDiscoverDataServiceOptions,
|
||||
terms: readonly string[],
|
||||
): Promise<CandidateRecord[]> {
|
||||
const searchResults = await searchLocalKnowledgePages(project, {
|
||||
query: input.query,
|
||||
userId: options.userId,
|
||||
embeddingService: options.embeddingService ?? null,
|
||||
limit: Math.max(input.limit ?? 15, 25),
|
||||
});
|
||||
const records: CandidateRecord[] = [];
|
||||
for (const result of searchResults) {
|
||||
const page = await readLocalKnowledgePage(project, { key: result.key, userId: options.userId });
|
||||
const content = page?.content ?? '';
|
||||
const matched = bestField(
|
||||
[
|
||||
{ matchedOn: 'name', text: result.key, weight: 1.1 },
|
||||
{ matchedOn: 'description', text: result.summary, weight: 1 },
|
||||
{ matchedOn: 'body', text: content, weight: 0.8 },
|
||||
],
|
||||
terms,
|
||||
);
|
||||
records.push({
|
||||
rankScore: result.score + (matched?.score ?? 0),
|
||||
ref: {
|
||||
kind: 'wiki',
|
||||
id: result.key,
|
||||
summary: result.summary || null,
|
||||
snippet: snippetAround(content, terms),
|
||||
matchedOn: matched?.matchedOn ?? 'body',
|
||||
},
|
||||
});
|
||||
}
|
||||
return records.sort((left, right) => right.rankScore - left.rankScore || left.ref.id.localeCompare(right.ref.id));
|
||||
}
|
||||
|
||||
async function slCandidates(
|
||||
project: KtxLocalProject,
|
||||
input: KtxDiscoverDataInput,
|
||||
kinds: ReadonlySet<KtxDiscoverDataKind>,
|
||||
terms: readonly string[],
|
||||
): Promise<CandidateRecord[]> {
|
||||
const records: CandidateRecord[] = [];
|
||||
for (const connectionId of configuredConnectionIds(project, input.connectionId)) {
|
||||
const sources = await loadLocalSlSourceRecords(project, { connectionId }).catch(() => []);
|
||||
for (const sourceRecord of sources) {
|
||||
const source = sourceRecord.source;
|
||||
if (hasKind(kinds, 'sl_source')) {
|
||||
const description = resolveDescription(source.descriptions, { priority: DEFAULT_PRIORITY });
|
||||
const matched = bestField(
|
||||
[
|
||||
{ matchedOn: 'name', text: source.name, weight: 1.2 },
|
||||
{ matchedOn: 'description', text: description, weight: 1 },
|
||||
{ matchedOn: 'display', text: source.table ?? source.sql ?? null, weight: 0.8 },
|
||||
],
|
||||
terms,
|
||||
);
|
||||
if (matched) {
|
||||
records.push({
|
||||
rankScore: matched.score,
|
||||
ref: {
|
||||
kind: 'sl_source',
|
||||
id: source.name,
|
||||
connectionId,
|
||||
summary: description,
|
||||
snippet:
|
||||
matched.matchedOn === 'description'
|
||||
? snippetAround(description, terms)
|
||||
: cap200(
|
||||
`${source.name}: ${[
|
||||
...source.measures.map((measure) => measure.name),
|
||||
...source.columns.map((column) => column.name),
|
||||
]
|
||||
.slice(0, 3)
|
||||
.join(', ')}`,
|
||||
),
|
||||
matchedOn: matched.matchedOn,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (hasKind(kinds, 'sl_measure')) {
|
||||
for (const measure of source.measures) {
|
||||
const matched = bestField(
|
||||
[
|
||||
{ matchedOn: 'name', text: measure.name, weight: 1.2 },
|
||||
{ matchedOn: 'description', text: measure.description, weight: 1 },
|
||||
{ matchedOn: 'expr', text: measure.expr, weight: 0.9 },
|
||||
],
|
||||
terms,
|
||||
);
|
||||
if (matched) {
|
||||
records.push({
|
||||
rankScore: matched.score,
|
||||
ref: {
|
||||
kind: 'sl_measure',
|
||||
id: `${source.name}.${measure.name}`,
|
||||
connectionId,
|
||||
summary: measure.description ?? null,
|
||||
snippet: cap200(measure.expr),
|
||||
matchedOn: matched.matchedOn,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasKind(kinds, 'sl_dimension')) {
|
||||
for (const column of source.columns) {
|
||||
const description = resolveDescription(column.descriptions, { priority: DEFAULT_PRIORITY });
|
||||
const matched = bestField(
|
||||
[
|
||||
{ matchedOn: 'name', text: column.name, weight: 1.2 },
|
||||
{ matchedOn: 'display', text: `${source.name}.${column.name}`, weight: 1.1 },
|
||||
{ matchedOn: 'description', text: description, weight: 1 },
|
||||
{ matchedOn: 'expr', text: column.expr, weight: 0.9 },
|
||||
],
|
||||
terms,
|
||||
);
|
||||
if (matched) {
|
||||
records.push({
|
||||
rankScore: matched.score,
|
||||
ref: {
|
||||
kind: 'sl_dimension',
|
||||
id: `${source.name}.${column.name}`,
|
||||
connectionId,
|
||||
summary: description,
|
||||
snippet: cap200(`${column.name} (${column.type})`),
|
||||
matchedOn: matched.matchedOn,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return records.sort((left, right) => right.rankScore - left.rankScore || left.ref.id.localeCompare(right.ref.id));
|
||||
}
|
||||
|
||||
async function rawCandidates(
|
||||
project: KtxLocalProject,
|
||||
input: KtxDiscoverDataInput,
|
||||
kinds: ReadonlySet<KtxDiscoverDataKind>,
|
||||
terms: readonly string[],
|
||||
): Promise<CandidateRecord[]> {
|
||||
const records: CandidateRecord[] = [];
|
||||
for (const connectionId of configuredConnectionIds(project, input.connectionId)) {
|
||||
const scan = await latestScan(project, connectionId);
|
||||
if (!scan) {
|
||||
continue;
|
||||
}
|
||||
for (const table of scan.tables) {
|
||||
const ref = tableRef(table);
|
||||
const display = displayForTable(ref);
|
||||
const tableDescription = resolveDescription(table.descriptions, { priority: DEFAULT_PRIORITY }) ?? table.comment;
|
||||
if (hasKind(kinds, 'table')) {
|
||||
const matched = bestField(
|
||||
[
|
||||
{ matchedOn: 'name', text: table.name, weight: 1.2 },
|
||||
{ matchedOn: 'display', text: display, weight: 1.1 },
|
||||
{ matchedOn: 'description', text: tableDescription, weight: 1 },
|
||||
{ matchedOn: 'comment', text: table.comment, weight: 1 },
|
||||
],
|
||||
terms,
|
||||
);
|
||||
if (matched) {
|
||||
records.push({
|
||||
rankScore: matched.score,
|
||||
ref: {
|
||||
kind: 'table',
|
||||
id: display,
|
||||
connectionId,
|
||||
tableRef: ref,
|
||||
summary: tableDescription,
|
||||
snippet:
|
||||
matched.matchedOn === 'description' || matched.matchedOn === 'comment'
|
||||
? snippetAround(matched.text, terms)
|
||||
: cap200(table.columns.slice(0, 5).map((column) => column.name).join(', ')),
|
||||
matchedOn: matched.matchedOn,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (hasKind(kinds, 'column')) {
|
||||
for (const column of table.columns) {
|
||||
const columnDescription = resolveDescription(column.descriptions, { priority: DEFAULT_PRIORITY }) ?? column.comment;
|
||||
const samples = (column.sampleValues ?? []).map((value) => String(value)).slice(0, 5);
|
||||
const matched = bestField(
|
||||
[
|
||||
{ matchedOn: 'name', text: column.name, weight: 1.2 },
|
||||
{ matchedOn: 'display', text: `${display}.${column.name}`, weight: 1.1 },
|
||||
{ matchedOn: 'description', text: columnDescription, weight: 1 },
|
||||
{ matchedOn: 'comment', text: column.comment, weight: 1 },
|
||||
{ matchedOn: 'sample_value', text: samples.join(' '), weight: 1.3 },
|
||||
],
|
||||
terms,
|
||||
);
|
||||
if (matched) {
|
||||
records.push({
|
||||
rankScore: matched.score,
|
||||
ref: {
|
||||
kind: 'column',
|
||||
id: `${display}.${column.name}`,
|
||||
connectionId,
|
||||
tableRef: ref,
|
||||
columnName: column.name,
|
||||
summary: columnDescription,
|
||||
snippet:
|
||||
matched.matchedOn === 'sample_value'
|
||||
? cap200(`${column.nativeType} - samples: ${samples.join(', ')}`)
|
||||
: matched.matchedOn === 'description' || matched.matchedOn === 'comment'
|
||||
? snippetAround(matched.text, terms)
|
||||
: cap200(column.nativeType),
|
||||
matchedOn: matched.matchedOn,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return records.sort((left, right) => right.rankScore - left.rankScore || left.ref.id.localeCompare(right.ref.id));
|
||||
}
|
||||
|
||||
function generator(
|
||||
name: string,
|
||||
candidates: CandidateRecord[],
|
||||
refsByKey: Map<string, Omit<KtxDiscoverDataRef, 'score'>>,
|
||||
): SearchCandidateGenerator {
|
||||
candidates.forEach((candidate) =>
|
||||
refsByKey.set(`${candidate.ref.kind}:${candidate.ref.connectionId ?? ''}:${candidate.ref.id}`, candidate.ref),
|
||||
);
|
||||
return {
|
||||
lane: name,
|
||||
weight: 1,
|
||||
async generate() {
|
||||
return {
|
||||
candidates: candidates.map((candidate, index) => ({
|
||||
id: `${candidate.ref.kind}:${candidate.ref.connectionId ?? ''}:${candidate.ref.id}`,
|
||||
rank: index + 1,
|
||||
rawScore: candidate.rankScore,
|
||||
})),
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function hydrate(
|
||||
fused: FusedSearchCandidate[],
|
||||
refsByKey: Map<string, Omit<KtxDiscoverDataRef, 'score'>>,
|
||||
): KtxDiscoverDataRef[] {
|
||||
const maxScore = Math.max(...fused.map((candidate) => candidate.score), 0);
|
||||
return fused
|
||||
.map((candidate) => {
|
||||
const ref = refsByKey.get(candidate.id);
|
||||
if (!ref) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
...ref,
|
||||
score: maxScore > 0 ? Number((candidate.score / maxScore).toFixed(6)) : 0,
|
||||
};
|
||||
})
|
||||
.filter((result): result is KtxDiscoverDataRef => result !== null);
|
||||
}
|
||||
|
||||
export function createKtxDiscoverDataService(
|
||||
project: KtxLocalProject,
|
||||
options: KtxDiscoverDataServiceOptions = {},
|
||||
): { search(input: KtxDiscoverDataInput): Promise<KtxDiscoverDataResponse> } {
|
||||
return {
|
||||
async search(input) {
|
||||
const limit = Math.max(1, Math.min(input.limit ?? 15, 50));
|
||||
const query = input.query.trim();
|
||||
if (!query) {
|
||||
return [];
|
||||
}
|
||||
const kinds = new Set(input.kinds ?? ALL_KINDS);
|
||||
const terms = queryTerms(query);
|
||||
const refsByKey = new Map<string, Omit<KtxDiscoverDataRef, 'score'>>();
|
||||
const generators: SearchCandidateGenerator[] = [];
|
||||
|
||||
if (hasKind(kinds, 'wiki')) {
|
||||
generators.push(generator('wiki', await wikiCandidates(project, { ...input, limit }, options, terms), refsByKey));
|
||||
}
|
||||
if (hasKind(kinds, 'sl_source') || hasKind(kinds, 'sl_measure') || hasKind(kinds, 'sl_dimension')) {
|
||||
generators.push(generator('semantic_layer', await slCandidates(project, { ...input, limit }, kinds, terms), refsByKey));
|
||||
}
|
||||
if (hasKind(kinds, 'table') || hasKind(kinds, 'column')) {
|
||||
generators.push(generator('raw_schema', await rawCandidates(project, { ...input, limit }, kinds, terms), refsByKey));
|
||||
}
|
||||
if (generators.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const result = await new HybridSearchCore().search({
|
||||
queryText: query,
|
||||
limit,
|
||||
generators,
|
||||
laneWeights: { wiki: 1, semantic_layer: 1, raw_schema: 1 },
|
||||
});
|
||||
return hydrate(result.results, refsByKey);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -10,6 +10,15 @@ export {
|
|||
assertSearchBackendCapabilities,
|
||||
assertSearchBackendConformanceCase,
|
||||
} from './backend-conformance.js';
|
||||
export { createKtxDiscoverDataService } from './discover.js';
|
||||
export type {
|
||||
KtxDiscoverDataInput,
|
||||
KtxDiscoverDataKind,
|
||||
KtxDiscoverDataMatchedOn,
|
||||
KtxDiscoverDataRef,
|
||||
KtxDiscoverDataResponse,
|
||||
KtxDiscoverDataServiceOptions,
|
||||
} from './discover.js';
|
||||
export { HybridSearchCore } from './hybrid-search-core.js';
|
||||
export { defaultLaneCandidatePoolLimit, normalizeSearchQuery } from './query.js';
|
||||
export {
|
||||
|
|
|
|||
228
packages/context/src/sl/dictionary-search.test.ts
Normal file
228
packages/context/src/sl/dictionary-search.test.ts
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
|
||||
import { createKtxDictionarySearchService } from './dictionary-search.js';
|
||||
|
||||
describe('createKtxDictionarySearchService', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-dictionary-search-'));
|
||||
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
||||
project.config.connections.warehouse = { driver: 'postgres', url: 'env:DATABASE_URL' };
|
||||
project.config.connections.billing = { driver: 'postgres', url: 'env:BILLING_DATABASE_URL' };
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function seedProfile(input: {
|
||||
connectionId: string;
|
||||
syncId: string;
|
||||
columns: Record<string, unknown>;
|
||||
}): Promise<void> {
|
||||
await project.fileStore.writeFile(
|
||||
`raw-sources/${input.connectionId}/live-database/${input.syncId}/enrichment/relationship-profile.json`,
|
||||
`${JSON.stringify(
|
||||
{
|
||||
connectionId: input.connectionId,
|
||||
driver: 'postgres',
|
||||
sqlAvailable: true,
|
||||
queryCount: 4,
|
||||
tables: [],
|
||||
columns: input.columns,
|
||||
warnings: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Seed relationship profile',
|
||||
);
|
||||
}
|
||||
|
||||
it('returns matches and non-authoritative misses across configured connections', async () => {
|
||||
await seedProfile({
|
||||
connectionId: 'warehouse',
|
||||
syncId: 'sync-1',
|
||||
columns: {
|
||||
'orders.status': {
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
column: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'string',
|
||||
distinctCount: 3,
|
||||
sampleValues: ['paid', 'refunded', 'pending'],
|
||||
},
|
||||
},
|
||||
});
|
||||
await seedProfile({
|
||||
connectionId: 'billing',
|
||||
syncId: 'sync-2',
|
||||
columns: {
|
||||
'customers.name': {
|
||||
table: { catalog: null, db: 'public', name: 'customers' },
|
||||
column: 'name',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'string',
|
||||
distinctCount: 4,
|
||||
sampleValues: ['Acme Corp', 'Globex'],
|
||||
},
|
||||
},
|
||||
});
|
||||
const service = createKtxDictionarySearchService(project);
|
||||
|
||||
await expect(service.search({ values: ['PAID', 'missing'] })).resolves.toEqual({
|
||||
searched: [
|
||||
{
|
||||
connectionId: 'billing',
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 1,
|
||||
syncId: 'sync-2',
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'ready',
|
||||
},
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 1,
|
||||
syncId: 'sync-1',
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'ready',
|
||||
},
|
||||
],
|
||||
results: [
|
||||
{
|
||||
value: 'PAID',
|
||||
matches: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
columnName: 'status',
|
||||
matchedValue: 'paid',
|
||||
cardinality: 3,
|
||||
},
|
||||
],
|
||||
misses: [{ connectionId: 'billing', reason: 'value_not_in_sample' }],
|
||||
},
|
||||
{
|
||||
value: 'missing',
|
||||
matches: [],
|
||||
misses: [
|
||||
{ connectionId: 'billing', reason: 'value_not_in_sample' },
|
||||
{ connectionId: 'warehouse', reason: 'value_not_in_sample' },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('distinguishes missing profile artifacts from profiles with no candidate columns', async () => {
|
||||
await seedProfile({
|
||||
connectionId: 'billing',
|
||||
syncId: 'sync-empty',
|
||||
columns: {
|
||||
'events.id': {
|
||||
table: { catalog: null, db: 'public', name: 'events' },
|
||||
column: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
distinctCount: 100,
|
||||
sampleValues: [1, 2, 3],
|
||||
},
|
||||
},
|
||||
});
|
||||
const service = createKtxDictionarySearchService(project);
|
||||
|
||||
await expect(service.search({ values: ['Acme'] })).resolves.toEqual({
|
||||
searched: [
|
||||
{
|
||||
connectionId: 'billing',
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 0,
|
||||
syncId: 'sync-empty',
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'no_candidate_columns',
|
||||
},
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 0,
|
||||
syncId: null,
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'no_profile_artifact',
|
||||
},
|
||||
],
|
||||
results: [
|
||||
{
|
||||
value: 'Acme',
|
||||
matches: [],
|
||||
misses: [
|
||||
{ connectionId: 'billing', reason: 'no_candidate_columns' },
|
||||
{ connectionId: 'warehouse', reason: 'no_profile_artifact' },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('scopes search to the requested connection', async () => {
|
||||
await seedProfile({
|
||||
connectionId: 'warehouse',
|
||||
syncId: 'sync-1',
|
||||
columns: {
|
||||
'orders.status': {
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
column: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'string',
|
||||
distinctCount: 3,
|
||||
sampleValues: ['paid'],
|
||||
},
|
||||
},
|
||||
});
|
||||
await seedProfile({
|
||||
connectionId: 'billing',
|
||||
syncId: 'sync-2',
|
||||
columns: {
|
||||
'invoices.status': {
|
||||
table: { catalog: null, db: 'public', name: 'invoices' },
|
||||
column: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'string',
|
||||
distinctCount: 2,
|
||||
sampleValues: ['paid'],
|
||||
},
|
||||
},
|
||||
});
|
||||
const service = createKtxDictionarySearchService(project);
|
||||
|
||||
await expect(service.search({ connectionId: 'billing', values: ['paid'] })).resolves.toMatchObject({
|
||||
searched: [{ connectionId: 'billing', status: 'ready' }],
|
||||
results: [
|
||||
{
|
||||
value: 'paid',
|
||||
matches: [{ connectionId: 'billing', sourceName: 'invoices', columnName: 'status', matchedValue: 'paid' }],
|
||||
misses: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
214
packages/context/src/sl/dictionary-search.ts
Normal file
214
packages/context/src/sl/dictionary-search.ts
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { loadLatestSlDictionaryEntries, type SlDictionaryEntry } from './sl-dictionary-profile.js';
|
||||
|
||||
export type KtxDictionarySearchStatus = 'ready' | 'no_profile_artifact' | 'no_candidate_columns';
|
||||
export type KtxDictionarySearchMissReason = 'no_profile_artifact' | 'no_candidate_columns' | 'value_not_in_sample';
|
||||
|
||||
export interface KtxDictionarySearchInput {
|
||||
values: string[];
|
||||
connectionId?: string;
|
||||
}
|
||||
|
||||
export interface KtxDictionarySearchCoverage {
|
||||
sampledRows: number | null;
|
||||
valuesPerColumn: number | null;
|
||||
profiledColumns: number;
|
||||
syncId: string | null;
|
||||
profiledAt: string | null;
|
||||
}
|
||||
|
||||
export interface KtxDictionarySearchSearchedConnection {
|
||||
connectionId: string;
|
||||
coverage: KtxDictionarySearchCoverage;
|
||||
status: KtxDictionarySearchStatus;
|
||||
}
|
||||
|
||||
export interface KtxDictionarySearchMatch {
|
||||
connectionId: string;
|
||||
sourceName: string;
|
||||
columnName: string;
|
||||
matchedValue: string;
|
||||
cardinality: number | null;
|
||||
}
|
||||
|
||||
export interface KtxDictionarySearchMiss {
|
||||
connectionId: string;
|
||||
reason: KtxDictionarySearchMissReason;
|
||||
}
|
||||
|
||||
export interface KtxDictionarySearchValueResult {
|
||||
value: string;
|
||||
matches: KtxDictionarySearchMatch[];
|
||||
misses: KtxDictionarySearchMiss[];
|
||||
}
|
||||
|
||||
export interface KtxDictionarySearchResponse {
|
||||
searched: KtxDictionarySearchSearchedConnection[];
|
||||
results: KtxDictionarySearchValueResult[];
|
||||
}
|
||||
|
||||
interface RelationshipProfileArtifact {
|
||||
connectionId?: string;
|
||||
profileSampleRows?: unknown;
|
||||
sampleValuesPerColumn?: unknown;
|
||||
profiledAt?: unknown;
|
||||
extractedAt?: unknown;
|
||||
}
|
||||
|
||||
function uniqueSorted(values: Iterable<string>): string[] {
|
||||
return [...new Set([...values].filter((value) => value.trim().length > 0))].sort((left, right) =>
|
||||
left.localeCompare(right),
|
||||
);
|
||||
}
|
||||
|
||||
function latestProfileSyncId(path: string): string | null {
|
||||
const parts = path.split('/');
|
||||
return parts.at(-3) ?? null;
|
||||
}
|
||||
|
||||
function optionalNumber(value: unknown): number | null {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : null;
|
||||
}
|
||||
|
||||
function optionalString(value: unknown): string | null {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value : null;
|
||||
}
|
||||
|
||||
async function latestProfilePath(project: KtxLocalProject, connectionId: string): Promise<string | null> {
|
||||
const root = `raw-sources/${connectionId}/live-database`;
|
||||
let files: string[];
|
||||
try {
|
||||
files = (await project.fileStore.listFiles(root)).files;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
files
|
||||
.filter((path) => path.endsWith('/enrichment/relationship-profile.json'))
|
||||
.sort((left, right) => left.localeCompare(right))
|
||||
.at(-1) ?? null
|
||||
);
|
||||
}
|
||||
|
||||
async function readProfile(project: KtxLocalProject, path: string): Promise<RelationshipProfileArtifact> {
|
||||
const raw = await project.fileStore.readFile(path);
|
||||
const parsed = JSON.parse(raw.content) as unknown;
|
||||
return typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)
|
||||
? (parsed as RelationshipProfileArtifact)
|
||||
: {};
|
||||
}
|
||||
|
||||
function profiledColumnCount(entries: readonly SlDictionaryEntry[]): number {
|
||||
return new Set(entries.map((entry) => `${entry.sourceName}\u001f${entry.columnName}`)).size;
|
||||
}
|
||||
|
||||
async function searchedConnection(
|
||||
project: KtxLocalProject,
|
||||
connectionId: string,
|
||||
entries: readonly SlDictionaryEntry[],
|
||||
): Promise<KtxDictionarySearchSearchedConnection> {
|
||||
const path = await latestProfilePath(project, connectionId);
|
||||
if (!path) {
|
||||
return {
|
||||
connectionId,
|
||||
coverage: {
|
||||
sampledRows: null,
|
||||
valuesPerColumn: null,
|
||||
profiledColumns: 0,
|
||||
syncId: null,
|
||||
profiledAt: null,
|
||||
},
|
||||
status: 'no_profile_artifact',
|
||||
};
|
||||
}
|
||||
|
||||
const profile = await readProfile(project, path);
|
||||
const count = profiledColumnCount(entries);
|
||||
return {
|
||||
connectionId,
|
||||
coverage: {
|
||||
sampledRows: optionalNumber(profile.profileSampleRows),
|
||||
valuesPerColumn: optionalNumber(profile.sampleValuesPerColumn),
|
||||
profiledColumns: count,
|
||||
syncId: latestProfileSyncId(path),
|
||||
profiledAt: optionalString(profile.profiledAt) ?? optionalString(profile.extractedAt),
|
||||
},
|
||||
status: count > 0 ? 'ready' : 'no_candidate_columns',
|
||||
};
|
||||
}
|
||||
|
||||
function entryMatchesValue(entry: SlDictionaryEntry, value: string): boolean {
|
||||
return entry.value.toLowerCase().includes(value.toLowerCase());
|
||||
}
|
||||
|
||||
function toMatch(entry: SlDictionaryEntry): KtxDictionarySearchMatch {
|
||||
return {
|
||||
connectionId: entry.connectionId,
|
||||
sourceName: entry.sourceName,
|
||||
columnName: entry.columnName,
|
||||
matchedValue: entry.value,
|
||||
cardinality: entry.cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
function sortMatches(matches: KtxDictionarySearchMatch[]): KtxDictionarySearchMatch[] {
|
||||
return matches.sort(
|
||||
(left, right) =>
|
||||
left.connectionId.localeCompare(right.connectionId) ||
|
||||
left.sourceName.localeCompare(right.sourceName) ||
|
||||
left.columnName.localeCompare(right.columnName) ||
|
||||
left.matchedValue.localeCompare(right.matchedValue),
|
||||
);
|
||||
}
|
||||
|
||||
function missReason(status: KtxDictionarySearchStatus): KtxDictionarySearchMissReason {
|
||||
return status === 'ready' ? 'value_not_in_sample' : status;
|
||||
}
|
||||
|
||||
export function createKtxDictionarySearchService(project: KtxLocalProject): {
|
||||
search(input: KtxDictionarySearchInput): Promise<KtxDictionarySearchResponse>;
|
||||
} {
|
||||
return {
|
||||
async search(input) {
|
||||
const connectionIds = input.connectionId
|
||||
? [input.connectionId]
|
||||
: uniqueSorted(Object.keys(project.config.connections));
|
||||
const entries = await loadLatestSlDictionaryEntries(project, connectionIds);
|
||||
const entriesByConnection = new Map<string, SlDictionaryEntry[]>();
|
||||
for (const connectionId of connectionIds) {
|
||||
entriesByConnection.set(
|
||||
connectionId,
|
||||
entries.filter((entry) => entry.connectionId === connectionId),
|
||||
);
|
||||
}
|
||||
|
||||
const searched = (
|
||||
await Promise.all(
|
||||
connectionIds.map((connectionId) =>
|
||||
searchedConnection(project, connectionId, entriesByConnection.get(connectionId) ?? []),
|
||||
),
|
||||
)
|
||||
).sort((left, right) => left.connectionId.localeCompare(right.connectionId));
|
||||
const searchedByConnection = new Map(searched.map((connection) => [connection.connectionId, connection]));
|
||||
|
||||
return {
|
||||
searched,
|
||||
results: input.values.map((value) => {
|
||||
const matches = sortMatches(entries.filter((entry) => entryMatchesValue(entry, value)).map(toMatch));
|
||||
const matchedConnections = new Set(matches.map((match) => match.connectionId));
|
||||
return {
|
||||
value,
|
||||
matches,
|
||||
misses: searched
|
||||
.filter((connection) => !matchedConnections.has(connection.connectionId))
|
||||
.map((connection) => ({
|
||||
connectionId: connection.connectionId,
|
||||
reason: missReason(searchedByConnection.get(connection.connectionId)?.status ?? 'no_profile_artifact'),
|
||||
})),
|
||||
};
|
||||
}),
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -25,6 +25,18 @@ export {
|
|||
} from './semantic-layer.service.js';
|
||||
export { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
|
||||
export type { SlDictionaryEntry } from './sl-dictionary-profile.js';
|
||||
export { createKtxDictionarySearchService } from './dictionary-search.js';
|
||||
export type {
|
||||
KtxDictionarySearchCoverage,
|
||||
KtxDictionarySearchInput,
|
||||
KtxDictionarySearchMatch,
|
||||
KtxDictionarySearchMiss,
|
||||
KtxDictionarySearchMissReason,
|
||||
KtxDictionarySearchResponse,
|
||||
KtxDictionarySearchSearchedConnection,
|
||||
KtxDictionarySearchStatus,
|
||||
KtxDictionarySearchValueResult,
|
||||
} from './dictionary-search.js';
|
||||
export { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
|
||||
export { SqliteSlSourcesIndex, type SqliteSlSourcesIndexOptions } from './sqlite-sl-sources-index.js';
|
||||
export * from './local-sl.js';
|
||||
|
|
|
|||
|
|
@ -108,6 +108,44 @@ describe('createHttpSqlAnalysisPort', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('maps read-only SQL validation responses', async () => {
|
||||
const requests: Array<{ path: string; payload: Record<string, unknown> }> = [];
|
||||
const port = createHttpSqlAnalysisPort({
|
||||
baseUrl: 'http://127.0.0.1:8765',
|
||||
requestJson: async (path, payload) => {
|
||||
requests.push({ path, payload });
|
||||
return { ok: false, error: 'SQL contains read/write operation: Insert' };
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
port.validateReadOnly('with x as (insert into t values (1)) select * from x', 'postgres'),
|
||||
).resolves.toEqual({
|
||||
ok: false,
|
||||
error: 'SQL contains read/write operation: Insert',
|
||||
});
|
||||
expect(requests).toEqual([
|
||||
{
|
||||
path: '/sql/validate-read-only',
|
||||
payload: {
|
||||
dialect: 'postgres',
|
||||
sql: 'with x as (insert into t values (1)) select * from x',
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects malformed read-only validation responses', async () => {
|
||||
const port = createHttpSqlAnalysisPort({
|
||||
baseUrl: 'http://127.0.0.1:8765',
|
||||
requestJson: async () => ({ ok: 'yes' }),
|
||||
});
|
||||
|
||||
await expect(port.validateReadOnly('select 1', 'postgres')).rejects.toThrow(
|
||||
'sql analysis response is missing boolean field ok',
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects malformed SQL batch responses instead of inventing defaults', async () => {
|
||||
const requestJson = vi.fn(async () => ({
|
||||
results: {
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import type {
|
|||
SqlAnalysisLiteralSlot,
|
||||
SqlAnalysisLiteralSlotType,
|
||||
SqlAnalysisPort,
|
||||
SqlReadOnlyValidationResult,
|
||||
} from './ports.js';
|
||||
|
||||
export type KtxSqlAnalysisHttpJsonRunner = (
|
||||
|
|
@ -96,6 +97,14 @@ function requiredStringArray(raw: Record<string, unknown>, field: string): strin
|
|||
return value;
|
||||
}
|
||||
|
||||
function requiredBoolean(raw: Record<string, unknown>, field: string): boolean {
|
||||
const value = raw[field];
|
||||
if (typeof value !== 'boolean') {
|
||||
throw new Error(`sql analysis response is missing boolean field ${field}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function requiredObject(raw: Record<string, unknown>, field: string): Record<string, unknown> {
|
||||
const value = raw[field];
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
|
|
@ -187,6 +196,14 @@ function mapBatchResponse(raw: Record<string, unknown>): Map<string, SqlAnalysis
|
|||
);
|
||||
}
|
||||
|
||||
function mapReadOnlyValidation(raw: Record<string, unknown>): SqlReadOnlyValidationResult {
|
||||
const error = optionalString(raw, 'error');
|
||||
return {
|
||||
ok: requiredBoolean(raw, 'ok'),
|
||||
...(error !== undefined ? { error } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export function createHttpSqlAnalysisPort(options: HttpSqlAnalysisPortOptions): SqlAnalysisPort {
|
||||
const requestJson = options.requestJson ?? postJson(options.baseUrl);
|
||||
|
||||
|
|
@ -205,5 +222,12 @@ export function createHttpSqlAnalysisPort(options: HttpSqlAnalysisPortOptions):
|
|||
});
|
||||
return mapBatchResponse(raw);
|
||||
},
|
||||
async validateReadOnly(sql: string, dialect: SqlAnalysisDialect) {
|
||||
const raw = await requestJson('/sql/validate-read-only', {
|
||||
dialect,
|
||||
sql,
|
||||
});
|
||||
return mapReadOnlyValidation(raw);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,4 +9,5 @@ export type {
|
|||
SqlAnalysisLiteralSlot,
|
||||
SqlAnalysisLiteralSlotType,
|
||||
SqlAnalysisPort,
|
||||
SqlReadOnlyValidationResult,
|
||||
} from './ports.js';
|
||||
|
|
|
|||
|
|
@ -38,10 +38,16 @@ export interface SqlAnalysisBatchResult {
|
|||
error?: string | null;
|
||||
}
|
||||
|
||||
export interface SqlReadOnlyValidationResult {
|
||||
ok: boolean;
|
||||
error?: string | null;
|
||||
}
|
||||
|
||||
export interface SqlAnalysisPort {
|
||||
analyzeForFingerprint(sql: string, dialect: SqlAnalysisDialect): Promise<SqlAnalysisFingerprintResult>;
|
||||
analyzeBatch(
|
||||
items: SqlAnalysisBatchItem[],
|
||||
dialect: SqlAnalysisDialect,
|
||||
): Promise<Map<string, SqlAnalysisBatchResult>>;
|
||||
validateReadOnly(sql: string, dialect: SqlAnalysisDialect): Promise<SqlReadOnlyValidationResult>;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue