mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-01 08:59:39 +02:00
* docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
1629 lines
50 KiB
TypeScript
1629 lines
50 KiB
TypeScript
import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { join } from 'node:path';
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
import { AgentRunnerService } from '../agent/index.js';
|
|
import { FakeSourceAdapter, type MemoryFlowReplayInput } from '../ingest/index.js';
|
|
import { initKtxProject } from '../project/index.js';
|
|
import {
|
|
createKtxConnectorCapabilities,
|
|
type KtxQueryResult,
|
|
type KtxScanConnector,
|
|
type KtxSchemaSnapshot,
|
|
} from '../scan/index.js';
|
|
import { writeLocalSlSource } from '../sl/index.js';
|
|
import { createLocalProjectMcpContextPorts } from './local-project-ports.js';
|
|
|
|
class TestAgentRunner extends AgentRunnerService {
|
|
override runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
|
|
|
|
constructor() {
|
|
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
|
}
|
|
}
|
|
|
|
describe('createLocalProjectMcpContextPorts', () => {
|
|
let tempDir: string;
|
|
|
|
beforeEach(async () => {
|
|
tempDir = await mkdtemp(join(tmpdir(), 'ktx-mcp-local-'));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await rm(tempDir, { recursive: true, force: true });
|
|
});
|
|
|
|
function testSnapshot(connectionId = 'warehouse'): KtxSchemaSnapshot {
|
|
return {
|
|
connectionId,
|
|
driver: 'postgres',
|
|
extractedAt: '2026-04-29T12:00:00.000Z',
|
|
scope: { schemas: ['public'] },
|
|
metadata: {},
|
|
tables: [
|
|
{
|
|
catalog: null,
|
|
db: 'public',
|
|
name: 'orders',
|
|
kind: 'table',
|
|
comment: null,
|
|
estimatedRows: 1,
|
|
foreignKeys: [],
|
|
columns: [
|
|
{
|
|
name: 'id',
|
|
nativeType: 'integer',
|
|
normalizedType: 'integer',
|
|
dimensionType: 'number',
|
|
nullable: false,
|
|
primaryKey: true,
|
|
comment: null,
|
|
},
|
|
],
|
|
},
|
|
],
|
|
};
|
|
}
|
|
|
|
function testConnector(snapshot = testSnapshot(), queryResult?: KtxQueryResult): KtxScanConnector {
|
|
return {
|
|
id: `test:${snapshot.connectionId}`,
|
|
driver: snapshot.driver,
|
|
capabilities: createKtxConnectorCapabilities({ readOnlySql: queryResult !== undefined }),
|
|
introspect: vi.fn(async () => snapshot),
|
|
executeReadOnly: queryResult === undefined ? undefined : vi.fn(async () => queryResult),
|
|
cleanup: vi.fn(async () => {}),
|
|
};
|
|
}
|
|
|
|
async function seedScanReport(projectDir: string, syncId = 'sync-1'): Promise<void> {
|
|
const root = `raw-sources/warehouse/live-database/${syncId}`;
|
|
await mkdir(join(projectDir, root, 'tables'), { recursive: true });
|
|
await writeFile(
|
|
join(projectDir, root, 'connection.json'),
|
|
JSON.stringify(
|
|
{
|
|
connectionId: 'warehouse',
|
|
driver: 'postgres',
|
|
extractedAt: '2026-05-14T09:00:00.000Z',
|
|
scope: { schemas: ['public'] },
|
|
},
|
|
null,
|
|
2,
|
|
),
|
|
'utf-8',
|
|
);
|
|
await writeFile(
|
|
join(projectDir, root, 'tables', 'orders.json'),
|
|
JSON.stringify(
|
|
{
|
|
catalog: null,
|
|
db: 'public',
|
|
name: 'orders',
|
|
kind: 'table',
|
|
comment: 'Customer orders',
|
|
estimatedRows: 12,
|
|
columns: [
|
|
{
|
|
name: 'id',
|
|
nativeType: 'integer',
|
|
normalizedType: 'integer',
|
|
dimensionType: 'number',
|
|
nullable: false,
|
|
primaryKey: true,
|
|
comment: null,
|
|
},
|
|
],
|
|
foreignKeys: [],
|
|
},
|
|
null,
|
|
2,
|
|
),
|
|
'utf-8',
|
|
);
|
|
await writeFile(
|
|
join(projectDir, root, 'scan-report.json'),
|
|
JSON.stringify(
|
|
{
|
|
connectionId: 'warehouse',
|
|
driver: 'postgres',
|
|
syncId,
|
|
runId: 'scan-1',
|
|
trigger: 'mcp',
|
|
mode: 'structural',
|
|
dryRun: false,
|
|
artifactPaths: {
|
|
rawSourcesDir: root,
|
|
reportPath: `${root}/scan-report.json`,
|
|
manifestShards: [],
|
|
enrichmentArtifacts: [],
|
|
},
|
|
diffSummary: {
|
|
tablesAdded: 0,
|
|
tablesModified: 0,
|
|
tablesDeleted: 0,
|
|
tablesUnchanged: 1,
|
|
columnsAdded: 0,
|
|
columnsModified: 0,
|
|
columnsDeleted: 0,
|
|
},
|
|
manifestShardsWritten: 0,
|
|
structuralSyncStats: {
|
|
tablesCreated: 1,
|
|
tablesUpdated: 0,
|
|
tablesDeleted: 0,
|
|
columnsCreated: 0,
|
|
columnsUpdated: 0,
|
|
columnsDeleted: 0,
|
|
},
|
|
enrichment: {
|
|
dataDictionary: 'skipped',
|
|
tableDescriptions: 'skipped',
|
|
columnDescriptions: 'skipped',
|
|
embeddings: 'skipped',
|
|
deterministicRelationships: 'skipped',
|
|
llmRelationshipValidation: 'skipped',
|
|
statisticalValidation: 'skipped',
|
|
},
|
|
capabilityGaps: [],
|
|
warnings: [],
|
|
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
|
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
|
|
createdAt: '2026-05-14T09:00:00.000Z',
|
|
},
|
|
null,
|
|
2,
|
|
),
|
|
'utf-8',
|
|
);
|
|
}
|
|
|
|
it('lists local project connections from ktx.yaml', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
|
|
await expect(ports.connections?.list()).resolves.toEqual([
|
|
{ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' },
|
|
]);
|
|
});
|
|
|
|
it('tests a local project connection through the native scan connector factory', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
const connector = testConnector();
|
|
const createConnector = vi.fn(async () => connector);
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
localScan: {
|
|
createConnector,
|
|
},
|
|
});
|
|
|
|
await expect(ports.connections?.test?.({ connectionId: 'warehouse' })).resolves.toEqual({
|
|
id: 'warehouse',
|
|
connectionType: 'POSTGRESQL',
|
|
ok: true,
|
|
tableCount: 1,
|
|
message: 'Connection test passed.',
|
|
warnings: [],
|
|
});
|
|
expect(createConnector).toHaveBeenCalledWith('warehouse');
|
|
expect(connector.introspect).toHaveBeenCalledWith(
|
|
{
|
|
connectionId: 'warehouse',
|
|
driver: 'postgres',
|
|
mode: 'structural',
|
|
dryRun: true,
|
|
detectRelationships: false,
|
|
},
|
|
{ runId: 'connection-test-warehouse' },
|
|
);
|
|
expect(connector.cleanup).toHaveBeenCalled();
|
|
});
|
|
|
|
it('executes MCP SQL only after parser-backed validation passes', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
const connector = testConnector(testSnapshot(), {
|
|
headers: ['id'],
|
|
headerTypes: ['integer'],
|
|
rows: [[1]],
|
|
totalRows: 1,
|
|
rowCount: 1,
|
|
});
|
|
const createConnector = vi.fn(async () => connector);
|
|
const sqlAnalysis = {
|
|
analyzeForFingerprint: vi.fn(),
|
|
analyzeBatch: vi.fn(),
|
|
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
|
|
};
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
sqlAnalysis,
|
|
localScan: {
|
|
createConnector,
|
|
},
|
|
});
|
|
|
|
await expect(
|
|
ports.sqlExecution?.execute({
|
|
connectionId: 'warehouse',
|
|
sql: 'select id from public.orders',
|
|
maxRows: 5,
|
|
}),
|
|
).resolves.toEqual({
|
|
headers: ['id'],
|
|
headerTypes: ['integer'],
|
|
rows: [[1]],
|
|
rowCount: 1,
|
|
});
|
|
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select id from public.orders', 'postgres');
|
|
expect(createConnector).toHaveBeenCalledWith('warehouse');
|
|
expect(connector.executeReadOnly).toHaveBeenCalledWith(
|
|
{
|
|
connectionId: 'warehouse',
|
|
sql: 'select id from public.orders',
|
|
maxRows: 5,
|
|
},
|
|
{ runId: 'mcp-sql-execution' },
|
|
);
|
|
expect(connector.cleanup).toHaveBeenCalled();
|
|
});
|
|
|
|
it('rejects MCP SQL before connector execution when parser validation fails', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
const connector = testConnector(testSnapshot(), {
|
|
headers: ['id'],
|
|
rows: [[1]],
|
|
totalRows: 1,
|
|
rowCount: 1,
|
|
});
|
|
const sqlAnalysis = {
|
|
analyzeForFingerprint: vi.fn(),
|
|
analyzeBatch: vi.fn(),
|
|
validateReadOnly: vi.fn(async () => ({
|
|
ok: false,
|
|
error: 'SQL contains read/write operation: Insert',
|
|
})),
|
|
};
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
sqlAnalysis,
|
|
localScan: {
|
|
createConnector: vi.fn(async () => connector),
|
|
},
|
|
});
|
|
|
|
await expect(
|
|
ports.sqlExecution?.execute({
|
|
connectionId: 'warehouse',
|
|
sql: 'with x as (insert into t values (1) returning *) select * from x',
|
|
maxRows: 1000,
|
|
}),
|
|
).rejects.toThrow('SQL contains read/write operation: Insert');
|
|
expect(connector.executeReadOnly).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('exposes local scan entity details through MCP ports', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
await seedScanReport(project.projectDir);
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
|
|
await expect(
|
|
ports.entityDetails?.read({
|
|
connectionId: 'warehouse',
|
|
entities: [{ table: 'public.orders', columns: ['id'] }],
|
|
}),
|
|
).resolves.toMatchObject({
|
|
results: [
|
|
{
|
|
ok: true,
|
|
connectionId: 'warehouse',
|
|
display: 'public.orders',
|
|
columns: [{ name: 'id', nativeType: 'integer' }],
|
|
snapshot: { syncId: 'sync-1', scanRunId: 'scan-1' },
|
|
},
|
|
],
|
|
});
|
|
});
|
|
|
|
it('returns a structured local entity details error when no scan exists', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
|
|
await expect(
|
|
ports.entityDetails?.read({
|
|
connectionId: 'warehouse',
|
|
entities: [{ table: 'public.orders' }],
|
|
}),
|
|
).resolves.toMatchObject({
|
|
results: [
|
|
{
|
|
ok: false,
|
|
connectionId: 'warehouse',
|
|
error: { code: 'scan_missing' },
|
|
},
|
|
],
|
|
});
|
|
});
|
|
|
|
it('exposes local dictionary search through MCP ports', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
await project.fileStore.writeFile(
|
|
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
|
`${JSON.stringify(
|
|
{
|
|
connectionId: 'warehouse',
|
|
driver: 'postgres',
|
|
sqlAvailable: true,
|
|
queryCount: 4,
|
|
tables: [],
|
|
columns: {
|
|
'orders.status': {
|
|
table: { catalog: null, db: 'public', name: 'orders' },
|
|
column: 'status',
|
|
nativeType: 'text',
|
|
normalizedType: 'string',
|
|
distinctCount: 2,
|
|
sampleValues: ['paid', 'refunded'],
|
|
},
|
|
},
|
|
warnings: [],
|
|
},
|
|
null,
|
|
2,
|
|
)}\n`,
|
|
'ktx',
|
|
'ktx@example.com',
|
|
'Seed dictionary profile',
|
|
);
|
|
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
|
|
await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toMatchObject({
|
|
searched: [{ connectionId: 'warehouse', status: 'ready' }],
|
|
results: [
|
|
{
|
|
value: 'paid',
|
|
matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', matchedValue: 'paid' }],
|
|
misses: [],
|
|
},
|
|
],
|
|
});
|
|
});
|
|
|
|
it('reports missing local dictionary profiles through MCP ports', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
|
|
await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toEqual({
|
|
searched: [
|
|
{
|
|
connectionId: 'warehouse',
|
|
coverage: {
|
|
sampledRows: null,
|
|
valuesPerColumn: null,
|
|
profiledColumns: 0,
|
|
syncId: null,
|
|
profiledAt: null,
|
|
},
|
|
status: 'no_profile_artifact',
|
|
},
|
|
],
|
|
results: [
|
|
{
|
|
value: 'paid',
|
|
matches: [],
|
|
misses: [{ connectionId: 'warehouse', reason: 'no_profile_artifact' }],
|
|
},
|
|
],
|
|
});
|
|
});
|
|
|
|
it('exposes local project discover_data across wiki, semantic-layer, and raw schema', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
await project.fileStore.writeFile(
|
|
'wiki/global/orders-playbook.md',
|
|
[
|
|
'---',
|
|
'summary: Paid order operations',
|
|
'tags: [orders]',
|
|
'refs: []',
|
|
'sl_refs: []',
|
|
'usage_mode: auto',
|
|
'---',
|
|
'',
|
|
'Paid orders are used for customer activity analysis.',
|
|
'',
|
|
].join('\n'),
|
|
'ktx',
|
|
'ktx@example.com',
|
|
'seed wiki',
|
|
);
|
|
await project.fileStore.writeFile(
|
|
'semantic-layer/warehouse/orders.yaml',
|
|
[
|
|
'name: orders',
|
|
'descriptions:',
|
|
' user: Paid order facts',
|
|
'table: public.orders',
|
|
'grain: [id]',
|
|
'columns:',
|
|
' - name: status',
|
|
' type: string',
|
|
' descriptions:',
|
|
' user: Payment status',
|
|
'measures:',
|
|
' - name: order_count',
|
|
' expr: count(*)',
|
|
' description: Number of paid orders',
|
|
'',
|
|
].join('\n'),
|
|
'ktx',
|
|
'ktx@example.com',
|
|
'seed sl',
|
|
);
|
|
await project.fileStore.writeFile(
|
|
'raw-sources/warehouse/live-database/sync-1/connection.json',
|
|
JSON.stringify({ connectionId: 'warehouse', driver: 'postgres', extractedAt: '2026-05-14T09:00:00.000Z' }, null, 2),
|
|
'ktx',
|
|
'ktx@example.com',
|
|
'seed connection',
|
|
);
|
|
await project.fileStore.writeFile(
|
|
'raw-sources/warehouse/live-database/sync-1/tables/public-orders.json',
|
|
JSON.stringify(
|
|
{
|
|
catalog: null,
|
|
db: 'public',
|
|
name: 'orders',
|
|
kind: 'table',
|
|
comment: 'Orders table',
|
|
estimatedRows: 10,
|
|
columns: [
|
|
{
|
|
name: 'status',
|
|
nativeType: 'text',
|
|
normalizedType: 'text',
|
|
dimensionType: 'string',
|
|
nullable: false,
|
|
primaryKey: false,
|
|
comment: 'Order status',
|
|
sampleValues: ['paid'],
|
|
},
|
|
],
|
|
foreignKeys: [],
|
|
},
|
|
null,
|
|
2,
|
|
),
|
|
'ktx',
|
|
'ktx@example.com',
|
|
'seed table',
|
|
);
|
|
await project.fileStore.writeFile(
|
|
'raw-sources/warehouse/live-database/sync-1/scan-report.json',
|
|
JSON.stringify(
|
|
{
|
|
connectionId: 'warehouse',
|
|
driver: 'postgres',
|
|
syncId: 'sync-1',
|
|
runId: 'scan-1',
|
|
trigger: 'mcp',
|
|
mode: 'enriched',
|
|
dryRun: false,
|
|
artifactPaths: {
|
|
rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
|
|
reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
|
|
manifestShards: [],
|
|
enrichmentArtifacts: [],
|
|
},
|
|
diffSummary: {
|
|
tablesAdded: 1,
|
|
tablesModified: 0,
|
|
tablesDeleted: 0,
|
|
tablesUnchanged: 0,
|
|
columnsAdded: 0,
|
|
columnsModified: 0,
|
|
columnsDeleted: 0,
|
|
},
|
|
manifestShardsWritten: 0,
|
|
structuralSyncStats: {
|
|
tablesCreated: 0,
|
|
tablesUpdated: 0,
|
|
tablesDeleted: 0,
|
|
columnsCreated: 0,
|
|
columnsUpdated: 0,
|
|
columnsDeleted: 0,
|
|
},
|
|
enrichment: {
|
|
dataDictionary: 'completed',
|
|
tableDescriptions: 'completed',
|
|
columnDescriptions: 'completed',
|
|
embeddings: 'skipped',
|
|
deterministicRelationships: 'skipped',
|
|
llmRelationshipValidation: 'skipped',
|
|
statisticalValidation: 'skipped',
|
|
},
|
|
capabilityGaps: [],
|
|
warnings: [],
|
|
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
|
enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
|
|
createdAt: '2026-05-14T09:00:00.000Z',
|
|
},
|
|
null,
|
|
2,
|
|
),
|
|
'ktx',
|
|
'ktx@example.com',
|
|
'seed scan report',
|
|
);
|
|
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
const results = await ports.discover?.search({ query: 'paid orders', connectionId: 'warehouse', limit: 10 });
|
|
|
|
expect(results).toEqual(
|
|
expect.arrayContaining([
|
|
expect.objectContaining({ kind: 'wiki', id: 'orders-playbook' }),
|
|
expect.objectContaining({ kind: 'sl_source', id: 'orders', connectionId: 'warehouse' }),
|
|
expect.objectContaining({ kind: 'table', id: 'public.orders', connectionId: 'warehouse' }),
|
|
]),
|
|
);
|
|
});
|
|
|
|
it('triggers canonical bundle ingest and reads status, report, and replay through MCP ports', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
};
|
|
project.config.ingest.adapters = ['fake'];
|
|
project.config.ingest.embeddings = {
|
|
backend: 'deterministic',
|
|
dimensions: 8,
|
|
batchSize: 64,
|
|
};
|
|
project.config.llm = {
|
|
provider: { backend: 'none' },
|
|
models: {},
|
|
};
|
|
|
|
const sourceDir = join(tempDir, 'source');
|
|
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
|
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
|
|
|
const agentRunner = new TestAgentRunner();
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
localIngest: {
|
|
adapters: [new FakeSourceAdapter()],
|
|
jobIdFactory: () => 'mcp-full-1',
|
|
agentRunner,
|
|
},
|
|
});
|
|
|
|
const trigger = await ports.ingest?.trigger({
|
|
adapter: 'fake',
|
|
connectionId: 'warehouse',
|
|
trigger: 'manual_resync',
|
|
config: { sourceDir },
|
|
});
|
|
|
|
expect(trigger).toMatchObject({
|
|
runId: expect.any(String),
|
|
jobId: 'mcp-full-1',
|
|
reportId: expect.any(String),
|
|
});
|
|
expect(trigger?.runId).not.toBe('mcp-full-1');
|
|
expect(agentRunner.runLoop).toHaveBeenCalledTimes(1);
|
|
|
|
await expect(ports.ingest?.status({ runId: trigger?.jobId ?? '' })).resolves.toMatchObject({
|
|
runId: trigger?.runId,
|
|
jobId: 'mcp-full-1',
|
|
reportId: trigger?.reportId,
|
|
status: 'done',
|
|
stage: 'done',
|
|
progress: 1,
|
|
done: true,
|
|
adapter: 'fake',
|
|
connectionId: 'warehouse',
|
|
sourceDir: null,
|
|
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
|
rawFileCount: 1,
|
|
workUnitCount: 1,
|
|
workUnits: [
|
|
{
|
|
unitKey: 'fake-orders',
|
|
rawFiles: ['orders/orders.json'],
|
|
peerFileIndex: [],
|
|
dependencyPaths: [],
|
|
},
|
|
],
|
|
evictionDeletedRawPaths: [],
|
|
errors: [],
|
|
});
|
|
|
|
await expect(ports.ingest?.report?.({ runId: trigger?.reportId ?? '' })).resolves.toMatchObject({
|
|
id: trigger?.reportId,
|
|
runId: trigger?.runId,
|
|
jobId: 'mcp-full-1',
|
|
connectionId: 'warehouse',
|
|
sourceKey: 'fake',
|
|
});
|
|
|
|
const replay = (await ports.ingest?.replay?.({ runId: trigger?.runId ?? '' })) as MemoryFlowReplayInput | null;
|
|
expect(replay).toMatchObject({
|
|
runId: trigger?.runId,
|
|
reportId: trigger?.reportId,
|
|
reportPath: trigger?.reportId,
|
|
status: 'done',
|
|
adapter: 'fake',
|
|
connectionId: 'warehouse',
|
|
syncId: expect.stringContaining('mcp-full-1'),
|
|
});
|
|
expect(replay?.events).toEqual(
|
|
expect.arrayContaining([
|
|
{ type: 'work_unit_finished', unitKey: 'fake-orders', status: 'success' },
|
|
{ type: 'report_created', runId: trigger?.runId, reportPath: trigger?.reportId },
|
|
]),
|
|
);
|
|
});
|
|
|
|
it('returns child run metadata for local Metabase fan-out triggers', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections = {
|
|
'prod-metabase': {
|
|
driver: 'metabase',
|
|
api_url: 'https://metabase.example.com',
|
|
},
|
|
warehouse_a: { driver: 'postgres', url: 'postgres://localhost/a' },
|
|
warehouse_b: { driver: 'postgres', url: 'postgres://localhost/b' },
|
|
};
|
|
project.config.ingest.adapters = ['metabase'];
|
|
const reportA = {
|
|
id: 'report-a',
|
|
runId: 'run-a',
|
|
jobId: 'child-a',
|
|
connectionId: 'warehouse_a',
|
|
sourceKey: 'metabase',
|
|
createdAt: '2026-05-04T12:00:00.000Z',
|
|
body: {
|
|
syncId: 'sync-a',
|
|
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
|
commitSha: null,
|
|
workUnits: [],
|
|
failedWorkUnits: [],
|
|
reconciliationSkipped: false,
|
|
conflictsResolved: [],
|
|
evictionsApplied: [],
|
|
unmappedFallbacks: [],
|
|
evictionInputs: [],
|
|
unresolvedCards: [],
|
|
supersededBy: null,
|
|
overrideOf: null,
|
|
provenanceRows: [],
|
|
toolTranscripts: [],
|
|
},
|
|
};
|
|
const reportB = {
|
|
...reportA,
|
|
id: 'report-b',
|
|
runId: 'run-b',
|
|
jobId: 'child-b',
|
|
connectionId: 'warehouse_b',
|
|
body: { ...reportA.body, syncId: 'sync-b' },
|
|
};
|
|
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
localIngest: {
|
|
runLocalMetabaseIngest: async () => ({
|
|
metabaseConnectionId: 'prod-metabase',
|
|
status: 'all_succeeded',
|
|
totals: { workUnits: 2, failedWorkUnits: 0 },
|
|
children: [
|
|
{
|
|
jobId: 'child-a',
|
|
metabaseConnectionId: 'prod-metabase',
|
|
metabaseDatabaseId: 1,
|
|
targetConnectionId: 'warehouse_a',
|
|
result: {
|
|
jobId: 'child-a',
|
|
runId: 'run-a',
|
|
syncId: 'sync-a',
|
|
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
|
workUnitCount: 0,
|
|
failedWorkUnits: [],
|
|
artifactsWritten: 0,
|
|
commitSha: null,
|
|
},
|
|
report: reportA,
|
|
},
|
|
{
|
|
jobId: 'child-b',
|
|
metabaseConnectionId: 'prod-metabase',
|
|
metabaseDatabaseId: 2,
|
|
targetConnectionId: 'warehouse_b',
|
|
result: {
|
|
jobId: 'child-b',
|
|
runId: 'run-b',
|
|
syncId: 'sync-b',
|
|
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
|
workUnitCount: 0,
|
|
failedWorkUnits: [],
|
|
artifactsWritten: 0,
|
|
commitSha: null,
|
|
},
|
|
report: reportB,
|
|
},
|
|
],
|
|
}),
|
|
},
|
|
});
|
|
|
|
await expect(
|
|
ports.ingest?.trigger({
|
|
adapter: 'metabase',
|
|
connectionId: 'prod-metabase',
|
|
trigger: 'manual_resync',
|
|
}),
|
|
).resolves.toEqual({
|
|
runId: 'metabase-fanout:prod-metabase',
|
|
jobId: undefined,
|
|
reportId: undefined,
|
|
fanout: {
|
|
status: 'all_succeeded',
|
|
children: [
|
|
{
|
|
runId: 'run-a',
|
|
jobId: 'child-a',
|
|
reportId: 'report-a',
|
|
targetConnectionId: 'warehouse_a',
|
|
metabaseDatabaseId: 1,
|
|
},
|
|
{
|
|
runId: 'run-b',
|
|
jobId: 'child-b',
|
|
reportId: 'report-b',
|
|
targetConnectionId: 'warehouse_b',
|
|
metabaseDatabaseId: 2,
|
|
},
|
|
],
|
|
},
|
|
});
|
|
});
|
|
|
|
it('writes, reads, and searches global wiki pages', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
|
|
await expect(
|
|
ports.knowledge?.write({
|
|
userId: 'local-user',
|
|
key: 'revenue',
|
|
summary: 'Revenue definition',
|
|
content: '# Revenue\n\nRevenue is net of refunds.',
|
|
tags: ['finance'],
|
|
refs: ['docs/revenue.md'],
|
|
slRefs: ['warehouse.orders'],
|
|
}),
|
|
).resolves.toMatchObject({ success: true, key: 'revenue', action: 'created' });
|
|
|
|
await expect(ports.knowledge?.read({ userId: 'local-user', key: 'revenue' })).resolves.toMatchObject({
|
|
key: 'revenue',
|
|
scope: 'GLOBAL',
|
|
summary: 'Revenue definition',
|
|
tags: ['finance'],
|
|
refs: ['docs/revenue.md'],
|
|
slRefs: ['warehouse.orders'],
|
|
content: '# Revenue\n\nRevenue is net of refunds.',
|
|
});
|
|
|
|
const search = await ports.knowledge?.search({ userId: 'local-user', query: 'refunds', limit: 5 });
|
|
expect(search).toEqual({
|
|
results: [
|
|
expect.objectContaining({
|
|
key: 'revenue',
|
|
path: 'wiki/global/revenue.md',
|
|
scope: 'GLOBAL',
|
|
summary: 'Revenue definition',
|
|
score: expect.any(Number),
|
|
matchReasons: expect.arrayContaining(['lexical']),
|
|
}),
|
|
],
|
|
totalFound: 1,
|
|
});
|
|
expect(search?.results[0]?.score).toBeGreaterThan(0);
|
|
await expect(access(join(project.projectDir, '.ktx', 'db.sqlite'))).resolves.toBeUndefined();
|
|
});
|
|
|
|
it('writes, lists, reads, and validates semantic-layer sources', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
|
|
await expect(
|
|
ports.semanticLayer?.writeSource({
|
|
connectionId: 'warehouse',
|
|
sourceName: 'orders',
|
|
source: {
|
|
name: 'orders',
|
|
table: 'public.orders',
|
|
grain: ['id'],
|
|
columns: [{ name: 'id', type: 'number' }],
|
|
joins: [],
|
|
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
|
},
|
|
}),
|
|
).resolves.toMatchObject({ success: true, sourceName: 'orders' });
|
|
|
|
await expect(ports.semanticLayer?.listSources({ connectionId: 'warehouse' })).resolves.toEqual({
|
|
sources: [
|
|
{
|
|
connectionId: 'warehouse',
|
|
connectionName: 'warehouse',
|
|
name: 'orders',
|
|
columnCount: 1,
|
|
measureCount: 1,
|
|
joinCount: 0,
|
|
},
|
|
],
|
|
totalSources: 1,
|
|
});
|
|
|
|
await expect(
|
|
ports.semanticLayer?.listSources({ connectionId: 'warehouse', query: 'order_count' }),
|
|
).resolves.toEqual({
|
|
sources: [
|
|
expect.objectContaining({
|
|
connectionId: 'warehouse',
|
|
connectionName: 'warehouse',
|
|
name: 'orders',
|
|
columnCount: 1,
|
|
measureCount: 1,
|
|
joinCount: 0,
|
|
score: expect.any(Number),
|
|
matchReasons: expect.arrayContaining(['lexical']),
|
|
}),
|
|
],
|
|
totalSources: 1,
|
|
});
|
|
await expect(access(join(project.projectDir, '.ktx/db.sqlite'))).resolves.toBeUndefined();
|
|
|
|
await expect(
|
|
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'orders' }),
|
|
).resolves.toMatchObject({
|
|
sourceName: 'orders',
|
|
yaml: expect.stringContaining('name: orders'),
|
|
});
|
|
|
|
await expect(ports.semanticLayer?.validate({ connectionId: 'warehouse' })).resolves.toEqual({
|
|
success: true,
|
|
errors: [],
|
|
warnings: ['Local stdio validation checks YAML shape only; Python semantic validation is not configured.'],
|
|
});
|
|
});
|
|
|
|
it('returns semantic-layer hybrid search metadata through local project ports', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
await writeLocalSlSource(project, {
|
|
connectionId: 'warehouse',
|
|
sourceName: 'orders',
|
|
yaml: [
|
|
'name: orders',
|
|
'table: public.orders',
|
|
'grain:',
|
|
' - order_id',
|
|
'columns:',
|
|
' - name: order_id',
|
|
' type: string',
|
|
' - name: status',
|
|
' type: string',
|
|
'',
|
|
].join('\n'),
|
|
});
|
|
await project.fileStore.writeFile(
|
|
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
|
`${JSON.stringify(
|
|
{
|
|
connectionId: 'warehouse',
|
|
driver: 'postgres',
|
|
sqlAvailable: true,
|
|
queryCount: 2,
|
|
tables: [],
|
|
columns: {
|
|
'orders.status': {
|
|
table: { catalog: null, db: 'public', name: 'orders' },
|
|
column: 'status',
|
|
nativeType: 'text',
|
|
normalizedType: 'string',
|
|
rowCount: 10,
|
|
nullCount: 0,
|
|
distinctCount: 2,
|
|
uniquenessRatio: 0.2,
|
|
nullRate: 0,
|
|
sampleValues: ['paid', 'refunded'],
|
|
minTextLength: 4,
|
|
maxTextLength: 8,
|
|
},
|
|
},
|
|
warnings: [],
|
|
},
|
|
null,
|
|
2,
|
|
)}\n`,
|
|
'ktx',
|
|
'ktx@example.com',
|
|
'Seed dictionary profile',
|
|
);
|
|
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
await expect(ports.semanticLayer?.listSources({ connectionId: 'warehouse', query: 'paid' })).resolves.toEqual({
|
|
sources: [
|
|
expect.objectContaining({
|
|
connectionId: 'warehouse',
|
|
connectionName: 'warehouse',
|
|
name: 'orders',
|
|
score: expect.any(Number),
|
|
matchReasons: expect.arrayContaining(['dictionary']),
|
|
dictionaryMatches: [{ column: 'status', values: ['paid'] }],
|
|
}),
|
|
],
|
|
totalSources: 1,
|
|
});
|
|
});
|
|
|
|
it('returns historic SQL usage frequency and snippet through semantic-layer list search', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
await project.fileStore.writeFile(
|
|
'semantic-layer/warehouse/_schema/public.yaml',
|
|
`tables:
|
|
orders:
|
|
table: public.orders
|
|
usage:
|
|
narrative: Analysts inspect paid order lifecycle by customer segment.
|
|
frequencyTier: high
|
|
commonFilters:
|
|
- status
|
|
commonGroupBys:
|
|
- customer_segment
|
|
commonJoins:
|
|
- table: public.customers
|
|
on:
|
|
- customer_id
|
|
columns:
|
|
- name: order_id
|
|
type: string
|
|
- name: status
|
|
type: string
|
|
`,
|
|
'ktx',
|
|
'ktx@example.com',
|
|
'Seed usage-backed manifest shard',
|
|
);
|
|
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
await expect(
|
|
ports.semanticLayer?.listSources({ connectionId: 'warehouse', query: 'paid order lifecycle' }),
|
|
).resolves.toEqual({
|
|
sources: [
|
|
expect.objectContaining({
|
|
connectionId: 'warehouse',
|
|
connectionName: 'warehouse',
|
|
name: 'orders',
|
|
frequencyTier: 'high',
|
|
snippet: expect.stringContaining('<mark>'),
|
|
score: expect.any(Number),
|
|
matchReasons: expect.arrayContaining(['lexical']),
|
|
}),
|
|
],
|
|
totalSources: 1,
|
|
});
|
|
});
|
|
|
|
it('uses configured local embeddings for semantic-layer search when available', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.ingest.embeddings = { backend: 'none', dimensions: 2 };
|
|
await writeLocalSlSource(project, {
|
|
connectionId: 'warehouse',
|
|
sourceName: 'orders',
|
|
yaml: [
|
|
'name: orders',
|
|
'descriptions:',
|
|
' user: Revenue facts',
|
|
'table: public.orders',
|
|
'grain:',
|
|
' - order_id',
|
|
'columns:',
|
|
' - name: order_id',
|
|
' type: string',
|
|
'',
|
|
].join('\n'),
|
|
});
|
|
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
embeddingService: {
|
|
maxBatchSize: 8,
|
|
async computeEmbedding(text: string) {
|
|
return text.includes('cash collection') ? [1, 0] : [0, 1];
|
|
},
|
|
async computeEmbeddingsBulk(texts: string[]) {
|
|
return texts.map((text) => (text.includes('Revenue facts') ? [1, 0] : [0, 1]));
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await ports.semanticLayer?.listSources({ connectionId: 'warehouse', query: 'cash collection' });
|
|
|
|
expect(result?.sources[0]).toMatchObject({
|
|
name: 'orders',
|
|
matchReasons: expect.arrayContaining(['semantic']),
|
|
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]),
|
|
});
|
|
});
|
|
|
|
it('rejects path traversal keys before touching the project directory', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
const ports = createLocalProjectMcpContextPorts(project);
|
|
|
|
await expect(
|
|
ports.knowledge?.read({
|
|
userId: 'local-user',
|
|
key: '../outside',
|
|
}),
|
|
).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".');
|
|
|
|
await expect(
|
|
ports.semanticLayer?.readSource({
|
|
connectionId: 'warehouse',
|
|
sourceName: '../orders',
|
|
}),
|
|
).rejects.toThrow('Unsafe semantic-layer source name');
|
|
});
|
|
|
|
it('uses semantic compute for validation and compile-only sl_query when supplied', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
const shapeOnlyPorts = createLocalProjectMcpContextPorts(project);
|
|
await shapeOnlyPorts.semanticLayer?.writeSource({
|
|
connectionId: 'warehouse',
|
|
sourceName: 'orders',
|
|
source: {
|
|
name: 'orders',
|
|
table: 'public.orders',
|
|
grain: ['id'],
|
|
columns: [
|
|
{ name: 'id', type: 'number' },
|
|
{ name: 'status', type: 'string' },
|
|
],
|
|
joins: [],
|
|
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
|
},
|
|
});
|
|
|
|
const semanticLayerCompute = {
|
|
validateSources: vi.fn(async () => ({
|
|
valid: true,
|
|
errors: [],
|
|
warnings: ['python validation ran'],
|
|
perSourceWarnings: {},
|
|
})),
|
|
query: vi.fn(async () => ({
|
|
sql: 'select status, count(*) as order_count from public.orders group by status',
|
|
dialect: 'postgres',
|
|
columns: [{ name: 'orders.status' }, { name: 'orders.order_count' }],
|
|
plan: { sources_used: ['orders'] },
|
|
})),
|
|
generateSources: vi.fn(),
|
|
};
|
|
const ports = createLocalProjectMcpContextPorts(project, { semanticLayerCompute });
|
|
|
|
await expect(ports.semanticLayer?.validate({ connectionId: 'warehouse', names: ['orders'] })).resolves.toEqual({
|
|
success: true,
|
|
errors: [],
|
|
warnings: ['python validation ran'],
|
|
});
|
|
expect(semanticLayerCompute.validateSources).toHaveBeenCalledWith({
|
|
sources: [
|
|
{
|
|
name: 'orders',
|
|
table: 'public.orders',
|
|
grain: ['id'],
|
|
columns: [
|
|
{ name: 'id', type: 'number' },
|
|
{ name: 'status', type: 'string' },
|
|
],
|
|
joins: [],
|
|
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
|
},
|
|
],
|
|
dialect: 'postgres',
|
|
recentlyTouched: ['orders'],
|
|
});
|
|
|
|
await expect(
|
|
ports.semanticLayer?.query({
|
|
connectionId: 'warehouse',
|
|
query: {
|
|
measures: ['orders.order_count'],
|
|
dimensions: ['orders.status'],
|
|
},
|
|
}),
|
|
).resolves.toMatchObject({
|
|
sql: 'select status, count(*) as order_count from public.orders group by status',
|
|
headers: ['orders.status', 'orders.order_count'],
|
|
rows: [],
|
|
totalRows: 0,
|
|
plan: {
|
|
sources_used: ['orders'],
|
|
execution: {
|
|
mode: 'compile_only',
|
|
reason: 'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.',
|
|
},
|
|
},
|
|
});
|
|
});
|
|
|
|
it('executes local MCP sl_query when a query executor is configured', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
const shapeOnlyPorts = createLocalProjectMcpContextPorts(project);
|
|
await shapeOnlyPorts.semanticLayer?.writeSource({
|
|
connectionId: 'warehouse',
|
|
sourceName: 'orders',
|
|
source: {
|
|
name: 'orders',
|
|
table: 'public.orders',
|
|
grain: ['id'],
|
|
columns: [{ name: 'id', type: 'number' }],
|
|
joins: [],
|
|
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
|
},
|
|
});
|
|
const compute = {
|
|
validateSources: vi.fn(),
|
|
generateSources: vi.fn(),
|
|
query: vi.fn(async () => ({
|
|
sql: 'select count(*) as order_count from public.orders',
|
|
dialect: 'postgres',
|
|
columns: [{ name: 'orders.order_count' }],
|
|
plan: {},
|
|
})),
|
|
};
|
|
const queryExecutor = {
|
|
execute: vi.fn(async () => ({
|
|
headers: ['orders.order_count'],
|
|
rows: [[3]],
|
|
totalRows: 1,
|
|
command: 'SELECT',
|
|
rowCount: 1,
|
|
})),
|
|
};
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
semanticLayerCompute: compute,
|
|
queryExecutor,
|
|
});
|
|
|
|
const result = await ports.semanticLayer?.query({
|
|
connectionId: 'warehouse',
|
|
query: { measures: ['orders.order_count'], dimensions: [], limit: 5 },
|
|
});
|
|
|
|
expect(result?.rows).toEqual([[3]]);
|
|
expect(result?.totalRows).toBe(1);
|
|
expect(queryExecutor.execute).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
connectionId: 'warehouse',
|
|
maxRows: 5,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('exposes detailed local ingest trigger and status ports when local ingest is enabled', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = { driver: 'postgres' };
|
|
project.config.ingest.adapters = ['fake'];
|
|
project.config.ingest.embeddings = {
|
|
backend: 'deterministic',
|
|
dimensions: 8,
|
|
batchSize: 64,
|
|
};
|
|
project.config.llm = {
|
|
provider: { backend: 'none' },
|
|
models: {},
|
|
};
|
|
const sourceDir = join(project.projectDir, 'upload');
|
|
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
|
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
|
|
|
let nextJob = 0;
|
|
const agentRunner = new TestAgentRunner();
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
localIngest: {
|
|
adapters: [new FakeSourceAdapter()],
|
|
jobIdFactory: () => `mcp-local-run-${++nextJob}`,
|
|
agentRunner,
|
|
},
|
|
});
|
|
|
|
const firstTrigger = await ports.ingest?.trigger({
|
|
adapter: 'fake',
|
|
connectionId: 'warehouse',
|
|
trigger: 'manual_resync',
|
|
config: { sourceDir },
|
|
});
|
|
|
|
expect(firstTrigger).toMatchObject({
|
|
runId: expect.any(String),
|
|
jobId: 'mcp-local-run-1',
|
|
reportId: expect.any(String),
|
|
});
|
|
expect(firstTrigger?.runId).not.toBe('mcp-local-run-1');
|
|
|
|
await expect(ports.ingest?.status({ runId: 'mcp-local-run-1' })).resolves.toMatchObject({
|
|
runId: firstTrigger?.runId,
|
|
jobId: 'mcp-local-run-1',
|
|
reportId: firstTrigger?.reportId,
|
|
status: 'done',
|
|
stage: 'done',
|
|
done: true,
|
|
progress: 1,
|
|
adapter: 'fake',
|
|
connectionId: 'warehouse',
|
|
sourceDir: null,
|
|
syncId: expect.stringContaining('mcp-local-run-1'),
|
|
startedAt: expect.any(String),
|
|
completedAt: expect.any(String),
|
|
previousRunId: null,
|
|
diffSummary: {
|
|
added: 1,
|
|
modified: 0,
|
|
deleted: 0,
|
|
unchanged: 0,
|
|
},
|
|
rawFileCount: 1,
|
|
workUnitCount: 1,
|
|
workUnits: [
|
|
{
|
|
unitKey: 'fake-orders',
|
|
rawFiles: ['orders/orders.json'],
|
|
peerFileIndex: [],
|
|
dependencyPaths: [],
|
|
},
|
|
],
|
|
evictionDeletedRawPaths: [],
|
|
errors: [],
|
|
});
|
|
|
|
const secondTrigger = await ports.ingest?.trigger({
|
|
adapter: 'fake',
|
|
connectionId: 'warehouse',
|
|
trigger: 'manual_resync',
|
|
config: { sourceDir },
|
|
});
|
|
|
|
expect(secondTrigger).toMatchObject({
|
|
runId: expect.any(String),
|
|
jobId: 'mcp-local-run-2',
|
|
reportId: expect.any(String),
|
|
});
|
|
expect(secondTrigger?.runId).not.toBe('mcp-local-run-2');
|
|
|
|
await expect(ports.ingest?.status({ runId: 'mcp-local-run-2' })).resolves.toMatchObject({
|
|
runId: secondTrigger?.runId,
|
|
jobId: 'mcp-local-run-2',
|
|
reportId: secondTrigger?.reportId,
|
|
status: 'done',
|
|
stage: 'done',
|
|
done: true,
|
|
progress: 1,
|
|
adapter: 'fake',
|
|
connectionId: 'warehouse',
|
|
sourceDir: null,
|
|
syncId: expect.stringContaining('mcp-local-run-2'),
|
|
startedAt: expect.any(String),
|
|
completedAt: expect.any(String),
|
|
previousRunId: null,
|
|
diffSummary: {
|
|
added: 0,
|
|
modified: 0,
|
|
deleted: 0,
|
|
unchanged: 1,
|
|
},
|
|
rawFileCount: 0,
|
|
workUnitCount: 0,
|
|
workUnits: [],
|
|
evictionDeletedRawPaths: [],
|
|
errors: [],
|
|
});
|
|
expect(agentRunner.runLoop).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('passes local ingest pull-config options into runLocalIngest', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = { driver: 'postgres' };
|
|
project.config.ingest.adapters = ['looker'];
|
|
const runLocalIngest = vi.fn(async () => ({
|
|
result: { ok: true },
|
|
report: {
|
|
id: 'report-1',
|
|
runId: 'run-1',
|
|
jobId: 'job-1',
|
|
sourceKey: 'looker',
|
|
connectionId: 'warehouse',
|
|
body: {
|
|
syncId: 'sync-1',
|
|
workUnits: [],
|
|
failedWorkUnits: [],
|
|
diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
|
|
provenanceRows: [],
|
|
},
|
|
},
|
|
}) as never);
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
localIngest: {
|
|
adapters: [
|
|
{ source: 'looker', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) },
|
|
],
|
|
pullConfigOptions: {
|
|
looker: {
|
|
daemonBaseUrl: 'http://127.0.0.1:61234',
|
|
},
|
|
},
|
|
runLocalIngest,
|
|
},
|
|
});
|
|
|
|
await expect(
|
|
ports.ingest?.trigger({
|
|
adapter: 'looker',
|
|
connectionId: 'warehouse',
|
|
trigger: 'manual_resync',
|
|
config: {},
|
|
}),
|
|
).resolves.toMatchObject({
|
|
runId: 'run-1',
|
|
jobId: 'job-1',
|
|
reportId: 'report-1',
|
|
});
|
|
|
|
expect(runLocalIngest).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
pullConfigOptions: {
|
|
looker: {
|
|
daemonBaseUrl: 'http://127.0.0.1:61234',
|
|
},
|
|
},
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('triggers fetch-capable local ingest without sourceDir config', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'postgres://localhost:5432/warehouse',
|
|
};
|
|
project.config.ingest.adapters = ['live-database'];
|
|
project.config.llm = {
|
|
provider: { backend: 'none' },
|
|
models: {},
|
|
};
|
|
const agentRunner = new TestAgentRunner();
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
localIngest: {
|
|
adapters: [
|
|
{
|
|
source: 'live-database',
|
|
skillNames: ['live_database_ingest'],
|
|
async fetch(_pullConfig, stagedDir) {
|
|
await mkdir(join(stagedDir, 'tables'), { recursive: true });
|
|
await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8');
|
|
await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8');
|
|
await writeFile(
|
|
join(stagedDir, 'tables', 'orders.json'),
|
|
'{"name":"orders","db":"public","columns":[]}\n',
|
|
'utf-8',
|
|
);
|
|
},
|
|
async detect() {
|
|
return true;
|
|
},
|
|
async chunk() {
|
|
return {
|
|
workUnits: [
|
|
{
|
|
unitKey: 'live-database-public-orders',
|
|
rawFiles: ['tables/orders.json'],
|
|
dependencyPaths: ['connection.json', 'foreign-keys.json'],
|
|
peerFileIndex: [],
|
|
},
|
|
],
|
|
};
|
|
},
|
|
},
|
|
],
|
|
jobIdFactory: () => 'local-live-db-mcp',
|
|
agentRunner,
|
|
},
|
|
});
|
|
|
|
const result = await ports.ingest?.trigger({
|
|
adapter: 'live-database',
|
|
connectionId: 'warehouse',
|
|
trigger: 'manual_resync',
|
|
config: {},
|
|
});
|
|
|
|
expect(result).toMatchObject({
|
|
runId: expect.any(String),
|
|
jobId: 'local-live-db-mcp',
|
|
reportId: expect.any(String),
|
|
});
|
|
expect(result?.runId).not.toBe('local-live-db-mcp');
|
|
await expect(ports.ingest?.status({ runId: 'local-live-db-mcp' })).resolves.toMatchObject({
|
|
runId: result?.runId,
|
|
jobId: 'local-live-db-mcp',
|
|
reportId: result?.reportId,
|
|
adapter: 'live-database',
|
|
sourceDir: null,
|
|
rawFileCount: 1,
|
|
workUnitCount: 1,
|
|
});
|
|
expect(agentRunner.runLoop).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('lists and reads only artifacts that belong to a local scan report', async () => {
|
|
const project = await initKtxProject({ projectDir: tempDir });
|
|
project.config.connections.warehouse = {
|
|
driver: 'postgres',
|
|
url: 'env:DATABASE_URL',
|
|
};
|
|
project.config.ingest.adapters = ['live-database'];
|
|
const ports = createLocalProjectMcpContextPorts(project, {
|
|
localScan: {
|
|
adapters: [
|
|
{
|
|
source: 'live-database',
|
|
skillNames: ['live_database_ingest'],
|
|
async fetch(_pullConfig, stagedDir) {
|
|
await mkdir(join(stagedDir, 'tables'), { recursive: true });
|
|
await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8');
|
|
await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8');
|
|
await writeFile(
|
|
join(stagedDir, 'tables', 'orders.json'),
|
|
'{"name":"orders","db":"public","columns":[]}\n',
|
|
'utf-8',
|
|
);
|
|
},
|
|
async detect() {
|
|
return true;
|
|
},
|
|
async chunk() {
|
|
return {
|
|
workUnits: [
|
|
{
|
|
unitKey: 'live-database-public-orders',
|
|
rawFiles: ['tables/orders.json'],
|
|
dependencyPaths: ['connection.json', 'foreign-keys.json'],
|
|
peerFileIndex: [],
|
|
},
|
|
],
|
|
};
|
|
},
|
|
},
|
|
],
|
|
jobIdFactory: () => 'local-scan-artifacts',
|
|
now: () => new Date('2026-04-29T12:00:00.000Z'),
|
|
},
|
|
});
|
|
|
|
const trigger = await ports.scan?.trigger({
|
|
connectionId: 'warehouse',
|
|
mode: 'structural',
|
|
detectRelationships: false,
|
|
dryRun: false,
|
|
});
|
|
|
|
expect(trigger?.runId).toBe('local-scan-artifacts');
|
|
const syncId = '2026-04-29-120000-local-scan-artifacts';
|
|
await expect(ports.scan?.listArtifacts?.({ runId: 'local-scan-artifacts' })).resolves.toEqual({
|
|
runId: 'local-scan-artifacts',
|
|
artifacts: [
|
|
{
|
|
path: `raw-sources/warehouse/live-database/${syncId}/connection.json`,
|
|
type: 'raw_source',
|
|
size: 29,
|
|
},
|
|
{
|
|
path: `raw-sources/warehouse/live-database/${syncId}/foreign-keys.json`,
|
|
type: 'raw_source',
|
|
size: 19,
|
|
},
|
|
{
|
|
path: `raw-sources/warehouse/live-database/${syncId}/scan-report.json`,
|
|
type: 'report',
|
|
size: expect.any(Number),
|
|
},
|
|
{
|
|
path: `raw-sources/warehouse/live-database/${syncId}/tables/orders.json`,
|
|
type: 'raw_source',
|
|
size: 45,
|
|
},
|
|
{
|
|
path: 'semantic-layer/warehouse/_schema/public.yaml',
|
|
type: 'manifest_shard',
|
|
size: expect.any(Number),
|
|
},
|
|
],
|
|
});
|
|
|
|
await expect(
|
|
ports.scan?.readArtifact?.({
|
|
runId: 'local-scan-artifacts',
|
|
path: `raw-sources/warehouse/live-database/${syncId}/tables/orders.json`,
|
|
}),
|
|
).resolves.toEqual({
|
|
runId: 'local-scan-artifacts',
|
|
path: `raw-sources/warehouse/live-database/${syncId}/tables/orders.json`,
|
|
type: 'raw_source',
|
|
size: 45,
|
|
content: '{"name":"orders","db":"public","columns":[]}\n',
|
|
});
|
|
|
|
await expect(
|
|
ports.scan?.readArtifact?.({
|
|
runId: 'local-scan-artifacts',
|
|
path: 'semantic-layer/warehouse/_schema/public.yaml',
|
|
}),
|
|
).resolves.toMatchObject({
|
|
runId: 'local-scan-artifacts',
|
|
path: 'semantic-layer/warehouse/_schema/public.yaml',
|
|
type: 'manifest_shard',
|
|
content: expect.stringContaining('orders:'),
|
|
});
|
|
|
|
await expect(
|
|
ports.scan?.readArtifact?.({
|
|
runId: 'local-scan-artifacts',
|
|
path: 'ktx.yaml',
|
|
}),
|
|
).resolves.toBeNull();
|
|
await expect(ports.scan?.listArtifacts?.({ runId: 'missing' })).resolves.toBeNull();
|
|
await expect(readFile(join(project.projectDir, 'ktx.yaml'), 'utf-8')).resolves.not.toContain('project:');
|
|
});
|
|
});
|