Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,509 @@
import { z } from 'zod';
import type { KloMcpContextPorts, KloMcpServerLike, KloMcpToolResult, KloMcpUserContext } from './types.js';
export interface RegisterKloContextToolsDeps {
server: KloMcpServerLike;
ports: KloMcpContextPorts;
userContext: KloMcpUserContext;
}
const connectionIdSchema = z.string().min(1);
const connectionListSchema = z.object({});
const connectionTestSchema = z.object({
connectionId: connectionIdSchema,
});
const knowledgeSearchSchema = z.object({
query: z.string().min(1),
limit: z.number().int().min(1).max(50).default(10),
});
const knowledgeReadSchema = z.object({
key: z.string().min(1),
});
const historicSqlUsageFrontmatterSchema = z.object({
executions: z.number().int().nonnegative(),
distinct_users: z.number().int().nonnegative(),
first_seen: z.string().min(1),
last_seen: z.string().min(1),
p50_runtime_ms: z.number().nonnegative().nullable(),
p95_runtime_ms: z.number().nonnegative().nullable(),
error_rate: z.number().min(0).max(1),
rows_produced: z.number().int().nonnegative().optional(),
});
const knowledgeWriteSchema = z.object({
key: z.string().min(1).max(120),
summary: z.string().min(1).max(200),
content: z.string().min(1),
tags: z.array(z.string()).optional(),
refs: z.array(z.string()).optional(),
sl_refs: z.array(z.string()).optional(),
source: z.string().optional(),
intent: z.string().optional(),
tables: z.array(z.string()).optional(),
representative_sql: z.string().optional(),
usage: historicSqlUsageFrontmatterSchema.optional(),
fingerprints: z.array(z.string()).optional(),
});
const slListSourcesSchema = z.object({
connectionId: connectionIdSchema.optional(),
query: z.string().min(1).optional(),
});
const slReadSourceSchema = z.object({
connectionId: connectionIdSchema,
sourceName: z.string().min(1),
});
const slWriteSourceSchema = z.object({
connectionId: connectionIdSchema,
sourceName: z.string().regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case'),
yaml: z.string().min(1).optional(),
source: z.record(z.string(), z.unknown()).optional(),
delete: z.boolean().optional(),
});
const slValidateSchema = z.object({
connectionId: connectionIdSchema,
names: z.array(z.string().min(1)).optional(),
});
const slQueryMeasureSchema = z.union([
z.string(),
z.object({
expr: z.string().min(1),
name: z.string().min(1),
}),
]);
const slQueryDimensionSchema = z.union([
z.string(),
z.object({
field: z.string().min(1),
granularity: z.string().min(1).optional(),
}),
]);
const slQueryOrderBySchema = z.union([
z.string(),
z.object({
field: z.string().min(1),
direction: z.enum(['asc', 'desc']).default('asc'),
}),
]);
const slQuerySchema = z.object({
connectionId: connectionIdSchema.optional(),
measures: z.array(slQueryMeasureSchema).min(1),
dimensions: z.array(slQueryDimensionSchema).default([]),
filters: z.array(z.string()).default([]),
segments: z.array(z.string()).default([]),
order_by: z.array(slQueryOrderBySchema).default([]),
limit: z.number().int().min(0).default(1000),
include_empty: z.boolean().default(true),
});
const ingestTriggerSchema = z.object({
adapter: z.string().min(1),
connectionId: connectionIdSchema,
config: z.unknown().optional(),
trigger: z.enum(['upload', 'scheduled_pull', 'manual_resync']).default('manual_resync'),
});
const ingestStatusSchema = z.object({
runId: z.string().min(1),
});
const ingestReportSchema = z.object({
runId: z.string().min(1),
});
const ingestReplaySchema = z.object({
runId: z.string().min(1),
});
const scanTriggerSchema = z.object({
connectionId: connectionIdSchema,
mode: z.enum(['structural', 'relationships', 'enriched']).default('structural'),
detectRelationships: z.boolean().default(false),
dryRun: z.boolean().default(false),
});
const scanStatusSchema = z.object({
runId: z.string().min(1),
});
const scanArtifactReadSchema = z.object({
runId: z.string().min(1),
path: z.string().min(1),
});
export function jsonToolResult<T extends object>(structuredContent: T): KloMcpToolResult<T> {
return {
content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }],
structuredContent,
};
}
export function jsonErrorToolResult(text: string): KloMcpToolResult<Record<string, never>> {
return {
content: [{ type: 'text', text }],
isError: true,
};
}
function registerParsedTool<TSchema extends z.ZodType>(
server: KloMcpServerLike,
name: string,
config: { title: string; description: string; inputSchema: unknown },
schema: TSchema,
handler: (input: z.infer<TSchema>) => Promise<KloMcpToolResult>,
): void {
server.registerTool(name, config, async (input) => handler(schema.parse(input)));
}
export function registerKloContextTools(deps: RegisterKloContextToolsDeps): void {
const { ports, server, userContext } = deps;
if (ports.connections) {
const connections = ports.connections;
registerParsedTool(
server,
'connection_list',
{
title: 'Connection List',
description: 'List configured read-only data connections available to the KLO project.',
inputSchema: connectionListSchema.shape,
},
connectionListSchema,
async () => jsonToolResult({ connections: await connections.list() }),
);
if (connections.test) {
registerParsedTool(
server,
'connection_test',
{
title: 'Connection Test',
description: 'Test a configured standalone KLO connection through the host-provided scan connector.',
inputSchema: connectionTestSchema.shape,
},
connectionTestSchema,
async (input) => {
const result = await connections.test?.({ connectionId: input.connectionId });
return result
? jsonToolResult(result)
: jsonErrorToolResult(`Connection "${input.connectionId}" was not found.`);
},
);
}
}
if (ports.knowledge) {
const knowledge = ports.knowledge;
registerParsedTool(
server,
'knowledge_search',
{
title: 'Knowledge Search',
description: 'Search KLO knowledge pages and return ranked summaries.',
inputSchema: knowledgeSearchSchema.shape,
},
knowledgeSearchSchema,
async (input) =>
jsonToolResult(
await knowledge.search({
userId: userContext.userId,
query: input.query,
limit: input.limit,
}),
),
);
registerParsedTool(
server,
'knowledge_read',
{
title: 'Knowledge Read',
description: 'Read a KLO knowledge page by key.',
inputSchema: knowledgeReadSchema.shape,
},
knowledgeReadSchema,
async (input) => {
const page = await knowledge.read({ userId: userContext.userId, key: input.key });
return page ? jsonToolResult(page) : jsonErrorToolResult(`Knowledge page "${input.key}" was not found.`);
},
);
registerParsedTool(
server,
'knowledge_write',
{
title: 'Knowledge Write',
description: 'Create or replace a KLO knowledge page and its SL references.',
inputSchema: knowledgeWriteSchema.shape,
},
knowledgeWriteSchema,
async (input) =>
jsonToolResult(
await knowledge.write({
userId: userContext.userId,
key: input.key,
summary: input.summary,
content: input.content,
tags: input.tags,
refs: input.refs,
slRefs: input.sl_refs,
source: input.source,
intent: input.intent,
tables: input.tables,
representativeSql: input.representative_sql,
usage: input.usage,
fingerprints: input.fingerprints,
}),
),
);
}
if (ports.semanticLayer) {
const semanticLayer = ports.semanticLayer;
registerParsedTool(
server,
'sl_list_sources',
{
title: 'Semantic Layer List Sources',
description: 'List semantic-layer sources, optionally filtered by connection or search query.',
inputSchema: slListSourcesSchema.shape,
},
slListSourcesSchema,
async (input) => jsonToolResult(await semanticLayer.listSources(input)),
);
registerParsedTool(
server,
'sl_read_source',
{
title: 'Semantic Layer Read Source',
description: 'Read a semantic-layer YAML source by connection id and source name.',
inputSchema: slReadSourceSchema.shape,
},
slReadSourceSchema,
async (input) => {
const source = await semanticLayer.readSource(input);
return source
? jsonToolResult(source)
: jsonErrorToolResult(`Semantic-layer source "${input.sourceName}" was not found.`);
},
);
registerParsedTool(
server,
'sl_write_source',
{
title: 'Semantic Layer Write Source',
description: 'Create, replace, or delete a semantic-layer source.',
inputSchema: slWriteSourceSchema.shape,
},
slWriteSourceSchema,
async (input) =>
jsonToolResult(
await semanticLayer.writeSource({
connectionId: input.connectionId,
sourceName: input.sourceName,
yaml: input.yaml,
source: input.source,
delete: input.delete,
}),
),
);
registerParsedTool(
server,
'sl_validate',
{
title: 'Semantic Layer Validate',
description: 'Validate semantic-layer sources for a connection.',
inputSchema: slValidateSchema.shape,
},
slValidateSchema,
async (input) => jsonToolResult(await semanticLayer.validate(input)),
);
registerParsedTool(
server,
'sl_query',
{
title: 'Semantic Layer Query',
description: 'Execute a semantic-layer query and return rows, headers, SQL, and the query plan.',
inputSchema: slQuerySchema.shape,
},
slQuerySchema,
async (input) =>
jsonToolResult(
await semanticLayer.query({
connectionId: input.connectionId,
query: {
measures: input.measures,
dimensions: input.dimensions,
filters: input.filters,
segments: input.segments,
order_by: input.order_by,
limit: input.limit,
include_empty: input.include_empty,
},
}),
),
);
}
if (ports.ingest) {
const ingest = ports.ingest;
registerParsedTool(
server,
'ingest_trigger',
{
title: 'Ingest Trigger',
description: 'Trigger a KLO ingest run for an adapter and connection.',
inputSchema: ingestTriggerSchema.shape,
},
ingestTriggerSchema,
async (input) => jsonToolResult(await ingest.trigger(input)),
);
registerParsedTool(
server,
'ingest_status',
{
title: 'Ingest Status',
description:
'Read the current or final status for an ingest run, including local diff and work-unit summaries when available.',
inputSchema: ingestStatusSchema.shape,
},
ingestStatusSchema,
async (input) => {
const status = await ingest.status(input);
return status ? jsonToolResult(status) : jsonErrorToolResult(`Ingest run "${input.runId}" was not found.`);
},
);
if (ingest.report) {
registerParsedTool(
server,
'ingest_report',
{
title: 'Ingest Report',
description: 'Read the stored canonical KLO ingest report for a local run id, job id, or report id.',
inputSchema: ingestReportSchema.shape,
},
ingestReportSchema,
async (input) => {
const report = await ingest.report?.(input);
return report ? jsonToolResult(report) : jsonErrorToolResult(`Ingest report "${input.runId}" was not found.`);
},
);
}
if (ingest.replay) {
registerParsedTool(
server,
'ingest_replay',
{
title: 'Ingest Replay',
description: 'Read the memory-flow replay snapshot for a stored canonical KLO ingest run.',
inputSchema: ingestReplaySchema.shape,
},
ingestReplaySchema,
async (input) => {
const replay = await ingest.replay?.(input);
return replay ? jsonToolResult(replay) : jsonErrorToolResult(`Ingest replay "${input.runId}" was not found.`);
},
);
}
}
if (ports.scan) {
const scan = ports.scan;
registerParsedTool(
server,
'scan_trigger',
{
title: 'Scan Trigger',
description: 'Run a standalone KLO structural connection scan and return its report summary.',
inputSchema: scanTriggerSchema.shape,
},
scanTriggerSchema,
async (input) => jsonToolResult(await scan.trigger(input)),
);
registerParsedTool(
server,
'scan_status',
{
title: 'Scan Status',
description: 'Read the current or final status for a standalone KLO scan run.',
inputSchema: scanStatusSchema.shape,
},
scanStatusSchema,
async (input) => {
const status = await scan.status(input);
return status ? jsonToolResult(status) : jsonErrorToolResult(`Scan run "${input.runId}" was not found.`);
},
);
registerParsedTool(
server,
'scan_report',
{
title: 'Scan Report',
description: 'Read a standalone KLO scan report by run id.',
inputSchema: scanStatusSchema.shape,
},
scanStatusSchema,
async (input) => {
const report = await scan.report(input);
return report ? jsonToolResult(report) : jsonErrorToolResult(`Scan report "${input.runId}" was not found.`);
},
);
if (scan.listArtifacts) {
registerParsedTool(
server,
'scan_list_artifacts',
{
title: 'Scan List Artifacts',
description: 'List report, raw-source, manifest, and enrichment artifact paths for a standalone KLO scan run.',
inputSchema: scanStatusSchema.shape,
},
scanStatusSchema,
async (input) => {
const result = await scan.listArtifacts?.({ runId: input.runId });
return result ? jsonToolResult(result) : jsonErrorToolResult(`Scan run "${input.runId}" was not found.`);
},
);
}
if (scan.readArtifact) {
registerParsedTool(
server,
'scan_read_artifact',
{
title: 'Scan Read Artifact',
description: 'Read one artifact that belongs to a standalone KLO scan run.',
inputSchema: scanArtifactReadSchema.shape,
},
scanArtifactReadSchema,
async (input) => {
const result = await scan.readArtifact?.({ runId: input.runId, path: input.path });
return result
? jsonToolResult(result)
: jsonErrorToolResult(`Scan artifact "${input.path}" was not found for run "${input.runId}".`);
},
);
}
}
}

View file

@ -0,0 +1,33 @@
export type { RegisterKloContextToolsDeps } from './context-tools.js';
export { jsonErrorToolResult, jsonToolResult, registerKloContextTools } from './context-tools.js';
export { createLocalProjectMcpContextPorts } from './local-project-ports.js';
export { createDefaultKloMcpServer, createKloMcpServer } from './server.js';
export type {
KloConnectionSummary,
KloConnectionsMcpPort,
KloIngestDiffSummary,
KloIngestMcpPort,
KloIngestStatusResponse,
KloIngestTriggerKind,
KloIngestTriggerResponse,
KloIngestWorkUnitSummary,
KloKnowledgeMcpPort,
KloKnowledgePage,
KloKnowledgeSearchResponse,
KloKnowledgeSearchResult,
KloKnowledgeWriteResponse,
KloMcpContextPorts,
KloMcpServerDeps,
KloMcpServerLike,
KloMcpTextContent,
KloMcpToolResult,
KloMcpUserContext,
KloSemanticLayerListResponse,
KloSemanticLayerMcpPort,
KloSemanticLayerQueryResponse,
KloSemanticLayerReadResponse,
KloSemanticLayerSourceSummary,
KloSemanticLayerValidationResponse,
KloSemanticLayerWriteResponse,
MemoryCapturePort,
} from './types.js';

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,683 @@
import YAML from 'yaml';
import {
type KloSqlQueryExecutorPort,
localConnectionInfoFromConfig,
localConnectionTypeForConfig,
} from '../connections/index.js';
import type { KloEmbeddingPort } from '../core/index.js';
import type { KloSemanticLayerComputePort } from '../daemon/index.js';
import {
createDefaultLocalIngestAdapters,
getLocalIngestStatus,
type IngestReportSnapshot,
ingestReportToMemoryFlowReplay,
type LocalIngestMcpOptions,
runLocalIngest,
runLocalMetabaseIngest,
} from '../ingest/index.js';
import { createLocalKloEmbeddingProviderFromConfig, KloIngestEmbeddingPortAdapter } from '../llm/index.js';
import type { KloLocalProject } from '../project/index.js';
import {
getLocalScanReport,
getLocalScanStatus,
type KloConnectionDriver,
type KloScanConnector,
type KloScanReport,
type LocalScanMcpOptions,
runLocalScan,
} from '../scan/index.js';
import {
compileLocalSlQuery,
type LocalSlSourceSearchResult,
type LocalSlSourceSummary,
listLocalSlSources,
searchLocalSlSources,
sourceDefinitionSchema,
sourceOverlaySchema,
} from '../sl/index.js';
import { readLocalKnowledgePage, searchLocalKnowledgePages, writeLocalKnowledgePage } from '../wiki/local-knowledge.js';
import type {
KloConnectionTestResponse,
KloIngestStatusResponse,
KloMcpContextPorts,
KloScanArtifactListResponse,
KloScanArtifactReadResponse,
KloScanArtifactSummary,
KloScanArtifactType,
} from './types.js';
const LOCAL_AUTHOR = 'klo';
const LOCAL_AUTHOR_EMAIL = 'klo@example.com';
const SL_SHAPE_WARNING = 'Local stdio validation checks YAML shape only; Python semantic validation is not configured.';
interface CreateLocalProjectMcpContextPortsOptions {
semanticLayerCompute?: KloSemanticLayerComputePort;
queryExecutor?: KloSqlQueryExecutorPort;
localIngest?: LocalIngestMcpOptions;
localScan?: LocalScanMcpOptions;
embeddingService?: KloEmbeddingPort | null;
}
function dialectForDriver(driver: string | undefined): string {
const normalized = (driver ?? 'postgres').toUpperCase();
const map: Record<string, string> = {
POSTGRESQL: 'postgres',
POSTGRES: 'postgres',
BIGQUERY: 'bigquery',
SNOWFLAKE: 'snowflake',
MYSQL: 'mysql',
SQLSERVER: 'tsql',
MSSQL: 'tsql',
SQLITE: 'sqlite',
DUCKDB: 'duckdb',
CLICKHOUSE: 'clickhouse',
REDSHIFT: 'redshift',
DATABRICKS: 'databricks',
};
return map[normalized] ?? 'postgres';
}
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
value.includes('..') ||
value.includes('\\') ||
value.startsWith('/') ||
value.startsWith('.') ||
value.includes('//')
) {
throw new Error(`Unsafe ${kind}: ${value}`);
}
return value;
}
function assertSafeConnectionId(connectionId: string): string {
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
throw new Error(`Unsafe connection id: ${connectionId}`);
}
return assertSafePathToken('connection id', connectionId);
}
function assertSafeSourceName(sourceName: string): string {
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
}
return assertSafePathToken('semantic-layer source name', sourceName);
}
function normalizeScanDriver(driver: string | undefined): KloConnectionDriver {
const normalized = (driver ?? '').toLowerCase();
if (
normalized === 'postgres' ||
normalized === 'postgresql' ||
normalized === 'sqlite' ||
normalized === 'sqlite3' ||
normalized === 'mysql' ||
normalized === 'clickhouse' ||
normalized === 'sqlserver' ||
normalized === 'bigquery' ||
normalized === 'snowflake' ||
normalized === 'posthog'
) {
return normalized === 'sqlite3' ? 'sqlite' : normalized;
}
return 'postgres';
}
async function cleanupConnector(connector: KloScanConnector | null): Promise<void> {
if (connector?.cleanup) {
await connector.cleanup();
}
}
async function testLocalConnection(
project: KloLocalProject,
options: CreateLocalProjectMcpContextPortsOptions,
connectionId: string,
): Promise<KloConnectionTestResponse | null> {
const safeConnectionId = assertSafeConnectionId(connectionId);
const connection = project.config.connections[safeConnectionId];
if (!connection) {
return null;
}
const connectionType = localConnectionTypeForConfig(safeConnectionId, connection);
const createConnector = options.localScan?.createConnector;
if (!createConnector) {
return {
id: safeConnectionId,
connectionType,
ok: true,
tableCount: null,
message: 'Connection is configured; no native scan connector is available for live testing.',
warnings: ['klo serve was not configured with a local scan connector factory.'],
};
}
let connector: KloScanConnector | null = null;
try {
connector = await createConnector(safeConnectionId);
const snapshot = await connector.introspect(
{
connectionId: safeConnectionId,
driver: normalizeScanDriver(connection.driver),
mode: 'structural',
dryRun: true,
detectRelationships: false,
},
{ runId: `connection-test-${safeConnectionId}` },
);
return {
id: safeConnectionId,
connectionType,
ok: true,
tableCount: snapshot.tables.length,
message: 'Connection test passed.',
warnings: [],
};
} catch (error) {
return {
id: safeConnectionId,
connectionType,
ok: false,
tableCount: null,
message: error instanceof Error ? error.message : String(error),
warnings: [],
};
} finally {
await cleanupConnector(connector);
}
}
function scanArtifactType(path: string, report: KloScanReport): KloScanArtifactType {
if (path === report.artifactPaths.reportPath) {
return 'report';
}
if (report.artifactPaths.manifestShards.includes(path)) {
return 'manifest_shard';
}
if (report.artifactPaths.enrichmentArtifacts.includes(path)) {
return 'enrichment_artifact';
}
return 'raw_source';
}
async function artifactSize(project: KloLocalProject, path: string): Promise<number | undefined> {
try {
const result = await project.fileStore.readFile(path);
return typeof result.size === 'number' ? result.size : undefined;
} catch {
return undefined;
}
}
async function listArtifactsForReport(
project: KloLocalProject,
runId: string,
report: KloScanReport,
): Promise<KloScanArtifactListResponse> {
const paths = new Set<string>();
if (report.artifactPaths.rawSourcesDir) {
const listed = await project.fileStore.listFiles(report.artifactPaths.rawSourcesDir);
for (const file of listed.files) {
paths.add(file);
}
}
if (report.artifactPaths.reportPath) {
paths.add(report.artifactPaths.reportPath);
}
for (const path of report.artifactPaths.manifestShards) {
paths.add(path);
}
for (const path of report.artifactPaths.enrichmentArtifacts) {
paths.add(path);
}
const artifacts: KloScanArtifactSummary[] = [];
for (const path of [...paths].sort()) {
const size = await artifactSize(project, path);
artifacts.push({
path,
type: scanArtifactType(path, report),
...(size === undefined ? {} : { size }),
});
}
return { runId, artifacts };
}
async function readScanArtifact(
project: KloLocalProject,
runId: string,
path: string,
): Promise<KloScanArtifactReadResponse | null> {
const report = await getLocalScanReport(project, runId);
if (!report) {
return null;
}
const listed = await listArtifactsForReport(project, runId, report);
const artifact = listed.artifacts.find((candidate) => candidate.path === path);
if (!artifact) {
return null;
}
const result = await project.fileStore.readFile(path);
return {
runId,
path,
type: artifact.type,
...(typeof result.size === 'number' ? { size: result.size } : {}),
content: result.content,
};
}
function slPath(connectionId: string, sourceName: string): string {
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
}
function sourceNameFromPath(path: string): string {
return (
path
.split('/')
.at(-1)
?.replace(/\.ya?ml$/, '') ?? path
);
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function parseYamlRecord(raw: string): Record<string, unknown> {
const parsed = YAML.parse(raw) as unknown;
if (!isRecord(parsed)) {
throw new Error('Semantic-layer source YAML must contain an object');
}
return parsed;
}
async function listSlPaths(project: KloLocalProject, connectionId?: string): Promise<string[]> {
const root = connectionId ? `semantic-layer/${assertSafeConnectionId(connectionId)}` : 'semantic-layer';
const listed = await project.fileStore.listFiles(root);
return listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort();
}
async function loadComputableSources(
project: KloLocalProject,
connectionId: string,
): Promise<Record<string, unknown>[]> {
const paths = await listSlPaths(project, connectionId);
const sources: Record<string, unknown>[] = [];
for (const path of paths) {
const raw = await project.fileStore.readFile(path);
const source = parseYamlRecord(raw.content);
if (source.table || source.sql) {
sources.push(source);
}
}
return sources;
}
function validateSourceRecord(sourceName: string, source: Record<string, unknown>): string[] {
const namedSource = { ...source, name: typeof source.name === 'string' ? source.name : sourceName };
const definition = sourceDefinitionSchema.safeParse(namedSource);
if (definition.success) {
return [];
}
const overlay = sourceOverlaySchema.safeParse(namedSource);
if (overlay.success) {
return [];
}
return definition.error.issues.map((issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`);
}
function localIngestSourceDir(config: unknown): string | undefined {
if (!isRecord(config) || config.sourceDir === undefined) {
return undefined;
}
if (typeof config.sourceDir !== 'string' || config.sourceDir.trim().length === 0) {
throw new Error('Local ingest config sourceDir must be a non-empty string when provided');
}
return config.sourceDir;
}
function rawFileCountFromIngestReport(report: IngestReportSnapshot): number {
return new Set(report.body.workUnits.flatMap((workUnit) => workUnit.rawFiles)).size;
}
function hasSlSearchMetadata(
source: LocalSlSourceSummary | LocalSlSourceSearchResult,
): source is LocalSlSourceSearchResult {
return 'score' in source;
}
function statusFromIngestReport(report: IngestReportSnapshot): KloIngestStatusResponse {
const failedWorkUnits = report.body.failedWorkUnits;
return {
runId: report.runId,
jobId: report.jobId,
reportId: report.id,
status: failedWorkUnits.length > 0 ? 'error' : 'done',
stage: 'done',
progress: 1,
errors: failedWorkUnits,
done: true,
adapter: report.sourceKey,
connectionId: report.connectionId,
sourceDir: null,
syncId: report.body.syncId,
startedAt: report.createdAt,
completedAt: report.createdAt,
previousRunId: null,
diffSummary: report.body.diffSummary,
workUnitCount: report.body.workUnits.length,
rawFileCount: rawFileCountFromIngestReport(report),
workUnits: report.body.workUnits.map((workUnit) => ({
unitKey: workUnit.unitKey,
rawFiles: [...workUnit.rawFiles],
peerFileIndex: [],
dependencyPaths: [],
})),
evictionDeletedRawPaths: [...report.body.evictionInputs],
};
}
export function createLocalProjectMcpContextPorts(
project: KloLocalProject,
options: CreateLocalProjectMcpContextPortsOptions = {},
): KloMcpContextPorts {
const configuredEmbeddingProvider = createLocalKloEmbeddingProviderFromConfig(project.config.ingest.embeddings);
const embeddingService =
options.embeddingService ??
(configuredEmbeddingProvider ? new KloIngestEmbeddingPortAdapter(configuredEmbeddingProvider) : null);
const ports: KloMcpContextPorts = {
connections: {
async list() {
return Object.entries(project.config.connections)
.map(([id, config]) => localConnectionInfoFromConfig(id, config))
.filter(
(connection): connection is { id: string; name: string; connectionType: string } => connection !== null,
)
.sort((a, b) => a.id.localeCompare(b.id));
},
async test(input) {
return testLocalConnection(project, options, input.connectionId);
},
},
knowledge: {
async search(input) {
const results = await searchLocalKnowledgePages(project, {
query: input.query,
userId: input.userId,
limit: input.limit,
embeddingService,
});
return {
results: results.slice(0, input.limit).map((result) => ({
key: result.key,
path: result.path,
scope: result.scope,
summary: result.summary,
score: result.score,
matchReasons: result.matchReasons,
lanes: result.lanes,
})),
totalFound: results.length,
};
},
async read(input) {
const page = await readLocalKnowledgePage(project, {
key: input.key,
userId: input.userId,
});
return page
? {
key: page.key,
scope: page.scope,
summary: page.summary,
content: page.content,
tags: page.tags,
refs: page.refs,
slRefs: page.slRefs,
}
: null;
},
async write(input) {
const existing = await readLocalKnowledgePage(project, {
key: input.key,
userId: input.userId,
});
await writeLocalKnowledgePage(project, {
key: input.key,
scope: 'GLOBAL',
userId: input.userId,
summary: input.summary,
content: input.content,
tags: input.tags,
refs: input.refs,
slRefs: input.slRefs,
source: input.source,
intent: input.intent,
tables: input.tables,
representativeSql: input.representativeSql,
usage: input.usage,
fingerprints: input.fingerprints,
});
return { success: true, key: input.key, action: existing ? 'updated' : 'created' };
},
},
semanticLayer: {
async listSources(input) {
const listed: Array<LocalSlSourceSummary | LocalSlSourceSearchResult> = input.query
? await searchLocalSlSources(project, {
connectionId: input.connectionId,
query: input.query,
embeddingService,
})
: await listLocalSlSources(project, { connectionId: input.connectionId });
const sources = listed.map((source) => ({
connectionId: source.connectionId,
connectionName: source.connectionId,
name: source.name,
description: source.description,
columnCount: source.columnCount,
measureCount: source.measureCount,
joinCount: source.joinCount,
...(hasSlSearchMetadata(source) ? { score: source.score } : {}),
...(hasSlSearchMetadata(source) && source.matchReasons ? { matchReasons: source.matchReasons } : {}),
...(hasSlSearchMetadata(source) && source.dictionaryMatches
? { dictionaryMatches: source.dictionaryMatches }
: {}),
...(hasSlSearchMetadata(source) && source.lanes ? { lanes: source.lanes } : {}),
}));
return { sources, totalSources: sources.length };
},
async readSource(input) {
const path = slPath(input.connectionId, input.sourceName);
try {
const result = await project.fileStore.readFile(path);
return { sourceName: input.sourceName, yaml: result.content };
} catch {
return null;
}
},
async writeSource(input) {
const path = slPath(input.connectionId, input.sourceName);
if (input.delete) {
const deleted = await project.fileStore.deleteFile(
path,
LOCAL_AUTHOR,
LOCAL_AUTHOR_EMAIL,
`Remove semantic-layer source: ${input.sourceName}`,
);
return { success: Boolean(deleted), sourceName: input.sourceName };
}
const yaml =
input.yaml ?? YAML.stringify({ ...input.source, name: input.sourceName }, { indent: 2, lineWidth: 0 });
parseYamlRecord(yaml);
await project.fileStore.writeFile(
path,
`${yaml.trimEnd()}\n`,
LOCAL_AUTHOR,
LOCAL_AUTHOR_EMAIL,
`Update semantic-layer source: ${input.sourceName}`,
);
return { success: true, sourceName: input.sourceName, yaml: `${yaml.trimEnd()}\n` };
},
async validate(input) {
if (options.semanticLayerCompute) {
const connectionId = assertSafeConnectionId(input.connectionId);
const result = await options.semanticLayerCompute.validateSources({
sources: await loadComputableSources(project, connectionId),
dialect: dialectForDriver(project.config.connections[connectionId]?.driver),
recentlyTouched: input.names,
});
return {
success: result.valid,
errors: result.errors,
warnings: result.warnings,
};
}
const names = new Set(input.names ?? []);
const paths = await listSlPaths(project, input.connectionId);
const errors: string[] = [];
for (const path of paths) {
const sourceName = sourceNameFromPath(path);
if (names.size > 0 && !names.has(sourceName)) {
continue;
}
try {
const raw = await project.fileStore.readFile(path);
errors.push(...validateSourceRecord(sourceName, parseYamlRecord(raw.content)));
} catch (error) {
errors.push(`${sourceName}: ${error instanceof Error ? error.message : String(error)}`);
}
}
return {
success: errors.length === 0,
errors,
warnings: [SL_SHAPE_WARNING],
};
},
async query(input) {
if (!options.semanticLayerCompute) {
throw new Error(
'sl_query requires a semantic-layer query adapter. Local stdio MCP exposes file-backed SL CRUD only.',
);
}
return compileLocalSlQuery(project, {
connectionId: input.connectionId,
query: input.query,
compute: options.semanticLayerCompute,
execute: Boolean(options.queryExecutor),
maxRows: input.query.limit,
queryExecutor: options.queryExecutor,
});
},
},
};
if (options.localIngest) {
ports.ingest = {
async trigger(input) {
const sourceDir = localIngestSourceDir(input.config);
if (input.adapter === 'metabase' && !sourceDir) {
const result = await (options.localIngest?.runLocalMetabaseIngest ?? runLocalMetabaseIngest)({
project,
adapters: options.localIngest?.adapters ?? createDefaultLocalIngestAdapters(project),
metabaseConnectionId: input.connectionId,
trigger: input.trigger,
jobIdFactory: options.localIngest?.jobIdFactory,
agentRunner: options.localIngest?.agentRunner,
llmProvider: options.localIngest?.llmProvider,
memoryModel: options.localIngest?.memoryModel,
semanticLayerCompute: options.localIngest?.semanticLayerCompute ?? options.semanticLayerCompute,
queryExecutor: options.localIngest?.queryExecutor ?? options.queryExecutor,
logger: options.localIngest?.logger,
});
return {
runId: `metabase-fanout:${result.metabaseConnectionId}`,
jobId: undefined,
reportId: undefined,
fanout: {
status: result.status,
children: result.children.map((child) => ({
runId: child.report.runId,
jobId: child.report.jobId,
reportId: child.report.id,
targetConnectionId: child.targetConnectionId,
metabaseDatabaseId: child.metabaseDatabaseId,
})),
},
};
}
const result = await runLocalIngest({
project,
adapters: options.localIngest?.adapters ?? createDefaultLocalIngestAdapters(project),
adapter: input.adapter,
connectionId: input.connectionId,
sourceDir,
trigger: input.trigger,
jobId: options.localIngest?.jobIdFactory?.(),
agentRunner: options.localIngest?.agentRunner,
llmProvider: options.localIngest?.llmProvider,
memoryModel: options.localIngest?.memoryModel,
semanticLayerCompute: options.localIngest?.semanticLayerCompute ?? options.semanticLayerCompute,
queryExecutor: options.localIngest?.queryExecutor ?? options.queryExecutor,
logger: options.localIngest?.logger,
});
return {
runId: result.report.runId,
jobId: result.report.jobId,
reportId: result.report.id,
};
},
async status(input) {
const report = await getLocalIngestStatus(project, input.runId);
return report ? statusFromIngestReport(report) : null;
},
async report(input) {
return getLocalIngestStatus(project, input.runId);
},
async replay(input) {
const report = await getLocalIngestStatus(project, input.runId);
return report ? ingestReportToMemoryFlowReplay(report) : null;
},
};
}
if (options.localScan) {
ports.scan = {
async trigger(input) {
return runLocalScan({
project,
connectionId: input.connectionId,
mode: input.mode,
detectRelationships: input.detectRelationships,
dryRun: input.dryRun,
trigger: 'mcp',
adapters: options.localScan?.adapters,
databaseIntrospectionUrl: options.localScan?.databaseIntrospectionUrl,
createConnector: options.localScan?.createConnector,
jobId: options.localScan?.jobIdFactory?.(),
now: options.localScan?.now,
});
},
async status(input) {
return getLocalScanStatus(project, input.runId);
},
async report(input) {
return getLocalScanReport(project, input.runId);
},
async listArtifacts(input) {
const report = await getLocalScanReport(project, input.runId);
return report ? listArtifactsForReport(project, input.runId, report) : null;
},
async readArtifact(input) {
return readScanArtifact(project, input.runId, input.path);
},
};
}
return ports;
}

View file

@ -0,0 +1,869 @@
import { access, mkdtemp, readFile, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { createLocalProjectMemoryCapture } from '../memory/index.js';
import { initKloProject } from '../project/index.js';
import { createKloMcpServer } from './server.js';
import type {
KloIngestMcpPort,
KloKnowledgeMcpPort,
KloMcpContextPorts,
KloScanMcpPort,
KloSemanticLayerMcpPort,
MemoryCapturePort,
} from './types.js';
type RegisteredTool = {
name: string;
config: { title?: string; description?: string; inputSchema: unknown };
handler: (input: Record<string, unknown>) => Promise<unknown>;
};
function makeFakeServer() {
const tools: RegisteredTool[] = [];
return {
tools,
server: {
registerTool(name: string, config: RegisteredTool['config'], handler: RegisteredTool['handler']): void {
tools.push({ name, config, handler });
},
},
};
}
function getTool(tools: RegisteredTool[], name: string): RegisteredTool {
const found = tools.find((tool) => tool.name === name);
if (!found) {
throw new Error(`Tool not registered: ${name}`);
}
return found;
}
describe('createKloMcpServer', () => {
it('registers context tools without memory capture tools when memory capture is omitted', async () => {
const fake = makeFakeServer();
createKloMcpServer({
server: fake.server,
userContext: { userId: 'local-user' },
contextTools: {
connections: {
async list() {
return [{ id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }];
},
},
},
});
expect(fake.tools.map((tool) => tool.name)).toEqual(['connection_list']);
await expect(getTool(fake.tools, 'connection_list').handler({})).resolves.toMatchObject({
structuredContent: {
connections: [{ id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }],
},
});
});
it('registers memory capture tools without host app dependencies', async () => {
const fake = makeFakeServer();
const capture: MemoryCapturePort = {
capture: vi.fn<MemoryCapturePort['capture']>().mockResolvedValue({ runId: 'run-1' }),
status: vi.fn<MemoryCapturePort['status']>().mockResolvedValue({
runId: 'run-1',
status: 'done',
stage: 'done',
done: true,
captured: { wiki: ['revenue'], sl: [], xrefs: [] },
error: null,
commitHash: 'abc123',
skillsLoaded: ['knowledge_capture'],
signalDetected: true,
}),
};
createKloMcpServer({
server: fake.server,
memoryCapture: capture,
userContext: { userId: 'mcp-user' },
});
expect(fake.tools.map((tool) => tool.name).sort()).toEqual(['memory_capture', 'memory_capture_status']);
const memoryCapture = getTool(fake.tools, 'memory_capture');
await expect(
memoryCapture.handler({
userMessage: 'Revenue means paid order value.',
assistantMessage: 'Captured.',
connectionId: '00000000-0000-4000-8000-000000000001',
}),
).resolves.toEqual({
content: [{ type: 'text', text: JSON.stringify({ runId: 'run-1' }, null, 2) }],
structuredContent: { runId: 'run-1' },
});
expect(capture.capture).toHaveBeenCalledWith({
userId: 'mcp-user',
chatId: expect.stringMatching(/^mcp-/),
userMessage: 'Revenue means paid order value.',
assistantMessage: 'Captured.',
connectionId: '00000000-0000-4000-8000-000000000001',
sourceType: 'external_ingest',
});
const memoryStatus = getTool(fake.tools, 'memory_capture_status');
await expect(memoryStatus.handler({ runId: 'run-1' })).resolves.toEqual({
content: [
{
type: 'text',
text: JSON.stringify(
{
runId: 'run-1',
status: 'done',
stage: 'done',
done: true,
captured: { wiki: ['revenue'], sl: [], xrefs: [] },
error: null,
commitHash: 'abc123',
skillsLoaded: ['knowledge_capture'],
signalDetected: true,
},
null,
2,
),
},
],
structuredContent: {
runId: 'run-1',
status: 'done',
stage: 'done',
done: true,
captured: { wiki: ['revenue'], sl: [], xrefs: [] },
error: null,
commitHash: 'abc123',
skillsLoaded: ['knowledge_capture'],
signalDetected: true,
},
});
});
it('returns an MCP error payload for missing run ids', async () => {
const fake = makeFakeServer();
const capture: MemoryCapturePort = {
capture: vi.fn<MemoryCapturePort['capture']>(),
status: vi.fn<MemoryCapturePort['status']>().mockResolvedValue(null),
};
createKloMcpServer({
server: fake.server,
memoryCapture: capture,
userContext: { userId: 'mcp-user' },
});
const memoryStatus = getTool(fake.tools, 'memory_capture_status');
await expect(memoryStatus.handler({ runId: 'missing' })).resolves.toEqual({
content: [{ type: 'text', text: 'Memory capture run "missing" was not found.' }],
isError: true,
});
});
it('runs MCP memory_capture against a local project memory port', async () => {
const tempDir = await mkdtemp(join(tmpdir(), 'klo-mcp-local-memory-'));
try {
const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' });
const agentRunner = {
runLoop: async ({
toolSet,
}: {
toolSet: Record<string, { execute: (input: unknown, options?: { toolCallId?: string }) => Promise<unknown> }>;
}) => {
await toolSet.load_skill.execute({ name: 'knowledge_capture' });
await toolSet.wiki_write.execute(
{
key: 'arr',
summary: 'ARR definition',
content: 'ARR means annual recurring revenue.',
},
{ toolCallId: 'wiki-write' },
);
return { stopReason: 'natural' as const };
},
};
const memoryCapture = createLocalProjectMemoryCapture(project, {
agentRunner: agentRunner as never,
runIdFactory: () => 'memory-run-mcp',
});
const fake = makeFakeServer();
createKloMcpServer({
server: fake.server,
memoryCapture,
userContext: { userId: 'mcp-user' },
});
const capture = await getTool(fake.tools, 'memory_capture').handler({
userMessage: 'define ARR as annual recurring revenue',
assistantMessage: 'Captured.',
});
expect(capture).toMatchObject({
structuredContent: { runId: 'memory-run-mcp' },
});
await memoryCapture.waitForRun('memory-run-mcp');
await expect(
getTool(fake.tools, 'memory_capture_status').handler({ runId: 'memory-run-mcp' }),
).resolves.toMatchObject({
structuredContent: {
runId: 'memory-run-mcp',
status: 'done',
done: true,
captured: { wiki: ['arr'], sl: [], xrefs: [] },
},
});
await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined();
await expect(access(join(project.projectDir, '.klo/memory-runs/memory-run-mcp.json'))).rejects.toThrow();
await expect(readFile(join(project.projectDir, 'knowledge/global/arr.md'), 'utf-8')).resolves.toContain(
'ARR means annual recurring revenue.',
);
} finally {
await rm(tempDir, { recursive: true, force: true });
}
});
it('registers KLO context MCP tools when context ports are supplied', async () => {
const fake = makeFakeServer();
const capture: MemoryCapturePort = {
capture: vi.fn<MemoryCapturePort['capture']>().mockResolvedValue({ runId: 'run-1' }),
status: vi.fn<MemoryCapturePort['status']>().mockResolvedValue(null),
};
const contextTools: KloMcpContextPorts = {
connections: {
list: vi.fn().mockResolvedValue([
{
id: '00000000-0000-4000-8000-000000000001',
name: 'Warehouse',
connectionType: 'POSTGRES',
},
]),
test: vi.fn().mockResolvedValue({
id: 'warehouse',
connectionType: 'postgres',
ok: true,
tableCount: 2,
message: 'Connection test passed.',
warnings: [],
}),
},
knowledge: {
search: vi.fn<KloKnowledgeMcpPort['search']>().mockResolvedValue({
results: [
{
key: 'revenue',
path: 'knowledge/global/revenue.md',
scope: 'GLOBAL',
summary: 'Paid order value',
score: 0.42,
matchReasons: ['lexical'],
},
],
totalFound: 1,
}),
read: vi.fn<KloKnowledgeMcpPort['read']>().mockResolvedValue({
key: 'revenue',
summary: 'Paid order value',
content: '# Revenue',
scope: 'GLOBAL',
tags: ['finance'],
refs: [],
slRefs: ['orders'],
}),
write: vi.fn<KloKnowledgeMcpPort['write']>().mockResolvedValue({
success: true,
key: 'revenue',
action: 'updated',
}),
},
semanticLayer: {
listSources: vi.fn<KloSemanticLayerMcpPort['listSources']>().mockResolvedValue({
sources: [
{
connectionId: '00000000-0000-4000-8000-000000000001',
connectionName: 'Warehouse',
name: 'orders',
description: 'Order facts',
columnCount: 2,
measureCount: 1,
joinCount: 0,
},
],
totalSources: 1,
}),
readSource: vi.fn<KloSemanticLayerMcpPort['readSource']>().mockResolvedValue({
sourceName: 'orders',
yaml: 'name: orders\n',
}),
writeSource: vi.fn<KloSemanticLayerMcpPort['writeSource']>().mockResolvedValue({
success: true,
sourceName: 'orders',
yaml: 'name: orders\n',
commitHash: 'abc123',
}),
validate: vi.fn<KloSemanticLayerMcpPort['validate']>().mockResolvedValue({
success: true,
errors: [],
warnings: [],
}),
query: vi.fn<KloSemanticLayerMcpPort['query']>().mockResolvedValue({
sql: 'select 1',
headers: ['count'],
rows: [[1]],
totalRows: 1,
plan: { sources: ['orders'] },
}),
},
ingest: {
trigger: vi.fn<KloIngestMcpPort['trigger']>().mockResolvedValue({
runId: 'run-42',
jobId: 'job-42',
reportId: 'report-42',
}),
status: vi.fn<KloIngestMcpPort['status']>().mockResolvedValue({
runId: 'run-42',
jobId: 'job-42',
reportId: 'report-42',
status: 'done',
stage: 'done',
progress: 1,
done: true,
adapter: 'fake',
connectionId: 'warehouse',
sourceDir: '/tmp/upload',
syncId: '2026-04-27-120000-run-42',
startedAt: '2026-04-27T12:00:00.000Z',
completedAt: '2026-04-27T12:00:01.000Z',
previousRunId: 'run-41',
diffSummary: {
added: 0,
modified: 1,
deleted: 0,
unchanged: 3,
},
rawFileCount: 4,
workUnitCount: 1,
workUnits: [
{
unitKey: 'fake-orders',
rawFiles: ['orders/orders.json'],
peerFileIndex: [],
dependencyPaths: [],
},
],
evictionDeletedRawPaths: [],
errors: [],
}),
report: vi.fn<NonNullable<KloIngestMcpPort['report']>>().mockResolvedValue({
id: 'report-42',
runId: 'run-42',
jobId: 'job-42',
connectionId: 'warehouse',
sourceKey: 'fake',
createdAt: '2026-04-27T12:00:01.000Z',
body: {
syncId: '2026-04-27-120000-run-42',
diffSummary: { added: 0, modified: 1, deleted: 0, unchanged: 3 },
commitSha: null,
workUnits: [],
failedWorkUnits: [],
reconciliationSkipped: false,
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
evictionInputs: [],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [],
toolTranscripts: [],
},
}),
replay: vi.fn<NonNullable<KloIngestMcpPort['replay']>>().mockResolvedValue({
runId: 'run-42',
reportId: 'report-42',
reportPath: 'report-42',
connectionId: 'warehouse',
adapter: 'fake',
status: 'done',
sourceDir: null,
syncId: '2026-04-27-120000-run-42',
errors: [],
events: [{ type: 'report_created', runId: 'run-42', reportPath: 'report-42' }],
plannedWorkUnits: [],
details: { actions: [], provenance: [], transcripts: [] },
}),
},
scan: {
trigger: vi.fn<KloScanMcpPort['trigger']>().mockResolvedValue({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report: {
connectionId: 'warehouse',
driver: 'postgres',
syncId: 'sync-1',
runId: 'scan-run-1',
trigger: 'mcp',
mode: 'structural',
dryRun: false,
artifactPaths: {
rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
manifestShards: [],
enrichmentArtifacts: [],
},
diffSummary: {
tablesAdded: 1,
tablesModified: 0,
tablesDeleted: 0,
tablesUnchanged: 0,
columnsAdded: 0,
columnsModified: 0,
columnsDeleted: 0,
},
manifestShardsWritten: 0,
structuralSyncStats: {
tablesCreated: 0,
tablesUpdated: 0,
tablesDeleted: 0,
columnsCreated: 0,
columnsUpdated: 0,
columnsDeleted: 0,
},
enrichment: {
dataDictionary: 'skipped',
tableDescriptions: 'skipped',
columnDescriptions: 'skipped',
embeddings: 'skipped',
deterministicRelationships: 'skipped',
llmRelationshipValidation: 'skipped',
statisticalValidation: 'skipped',
},
capabilityGaps: [],
warnings: [],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
enrichmentState: {
resumedStages: [],
completedStages: [],
failedStages: [],
},
createdAt: '2026-04-29T09:00:00.000Z',
},
}),
status: vi.fn<KloScanMcpPort['status']>().mockResolvedValue({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
progress: 1,
startedAt: '2026-04-29T09:00:00.000Z',
completedAt: '2026-04-29T09:00:01.000Z',
reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
warnings: [],
}),
report: vi.fn<KloScanMcpPort['report']>().mockResolvedValue(null),
listArtifacts: vi.fn<NonNullable<KloScanMcpPort['listArtifacts']>>().mockResolvedValue({
runId: 'scan-run-1',
artifacts: [
{
path: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
type: 'report',
size: 128,
},
{
path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json',
type: 'raw_source',
size: 64,
},
],
}),
readArtifact: vi.fn<NonNullable<KloScanMcpPort['readArtifact']>>().mockImplementation(async (input) => {
if (input.path !== 'raw-sources/warehouse/live-database/sync-1/tables/orders.json') {
return null;
}
return {
runId: input.runId,
path: input.path,
type: 'raw_source',
size: 64,
content: '{"name":"orders"}\n',
};
}),
},
};
createKloMcpServer({
server: fake.server,
memoryCapture: capture,
userContext: { userId: 'mcp-user' },
contextTools,
});
expect(fake.tools.map((tool) => tool.name).sort()).toEqual([
'connection_list',
'connection_test',
'ingest_replay',
'ingest_report',
'ingest_status',
'ingest_trigger',
'knowledge_read',
'knowledge_search',
'knowledge_write',
'memory_capture',
'memory_capture_status',
'scan_list_artifacts',
'scan_read_artifact',
'scan_report',
'scan_status',
'scan_trigger',
'sl_list_sources',
'sl_query',
'sl_read_source',
'sl_validate',
'sl_write_source',
]);
await expect(getTool(fake.tools, 'connection_list').handler({})).resolves.toEqual({
content: [
{
type: 'text',
text: JSON.stringify(
{
connections: [
{
id: '00000000-0000-4000-8000-000000000001',
name: 'Warehouse',
connectionType: 'POSTGRES',
},
],
},
null,
2,
),
},
],
structuredContent: {
connections: [
{
id: '00000000-0000-4000-8000-000000000001',
name: 'Warehouse',
connectionType: 'POSTGRES',
},
],
},
});
await expect(getTool(fake.tools, 'connection_test').handler({ connectionId: 'warehouse' })).resolves.toEqual({
content: [
{
type: 'text',
text: JSON.stringify(
{
id: 'warehouse',
connectionType: 'postgres',
ok: true,
tableCount: 2,
message: 'Connection test passed.',
warnings: [],
},
null,
2,
),
},
],
structuredContent: {
id: 'warehouse',
connectionType: 'postgres',
ok: true,
tableCount: 2,
message: 'Connection test passed.',
warnings: [],
},
});
expect(contextTools.connections?.test).toHaveBeenCalledWith({ connectionId: 'warehouse' });
await getTool(fake.tools, 'knowledge_search').handler({ query: 'revenue', limit: 5 });
expect(contextTools.knowledge?.search).toHaveBeenCalledWith({
userId: 'mcp-user',
query: 'revenue',
limit: 5,
});
await getTool(fake.tools, 'knowledge_read').handler({ key: 'revenue' });
expect(contextTools.knowledge?.read).toHaveBeenCalledWith({
userId: 'mcp-user',
key: 'revenue',
});
await getTool(fake.tools, 'knowledge_write').handler({
key: 'revenue',
summary: 'Paid order value',
content: '# Revenue',
tags: ['finance'],
refs: ['gross-margin'],
sl_refs: ['orders'],
});
expect(contextTools.knowledge?.write).toHaveBeenCalledWith({
userId: 'mcp-user',
key: 'revenue',
summary: 'Paid order value',
content: '# Revenue',
tags: ['finance'],
refs: ['gross-margin'],
slRefs: ['orders'],
});
await getTool(fake.tools, 'sl_list_sources').handler({
connectionId: '00000000-0000-4000-8000-000000000001',
query: 'orders',
});
expect(contextTools.semanticLayer?.listSources).toHaveBeenCalledWith({
connectionId: '00000000-0000-4000-8000-000000000001',
query: 'orders',
});
await getTool(fake.tools, 'sl_read_source').handler({
connectionId: 'warehouse',
sourceName: 'orders',
});
expect(contextTools.semanticLayer?.readSource).toHaveBeenCalledWith({
connectionId: 'warehouse',
sourceName: 'orders',
});
await getTool(fake.tools, 'sl_write_source').handler({
connectionId: '00000000-0000-4000-8000-000000000001',
sourceName: 'orders',
source: { name: 'orders', table: 'public.orders', grain: ['id'], columns: [], joins: [], measures: [] },
});
expect(contextTools.semanticLayer?.writeSource).toHaveBeenCalledWith({
connectionId: '00000000-0000-4000-8000-000000000001',
sourceName: 'orders',
source: { name: 'orders', table: 'public.orders', grain: ['id'], columns: [], joins: [], measures: [] },
yaml: undefined,
delete: undefined,
});
await getTool(fake.tools, 'sl_validate').handler({
connectionId: '00000000-0000-4000-8000-000000000001',
names: ['orders'],
});
expect(contextTools.semanticLayer?.validate).toHaveBeenCalledWith({
connectionId: '00000000-0000-4000-8000-000000000001',
names: ['orders'],
});
await getTool(fake.tools, 'sl_query').handler({
connectionId: '00000000-0000-4000-8000-000000000001',
measures: ['orders.count'],
dimensions: ['orders.created_at'],
filters: ['orders.status = paid'],
limit: 25,
});
expect(contextTools.semanticLayer?.query).toHaveBeenCalledWith({
connectionId: '00000000-0000-4000-8000-000000000001',
query: {
measures: ['orders.count'],
dimensions: ['orders.created_at'],
filters: ['orders.status = paid'],
segments: [],
order_by: [],
limit: 25,
include_empty: true,
},
});
await getTool(fake.tools, 'ingest_trigger').handler({
adapter: 'lookml',
connectionId: '00000000-0000-4000-8000-000000000001',
trigger: 'scheduled_pull',
config: { repoUrl: 'https://github.com/acme/looker.git' },
});
expect(contextTools.ingest?.trigger).toHaveBeenCalledWith({
adapter: 'lookml',
connectionId: '00000000-0000-4000-8000-000000000001',
trigger: 'scheduled_pull',
config: { repoUrl: 'https://github.com/acme/looker.git' },
});
expect(getTool(fake.tools, 'ingest_status').config.description).toBe(
'Read the current or final status for an ingest run, including local diff and work-unit summaries when available.',
);
await expect(getTool(fake.tools, 'ingest_status').handler({ runId: 'run-42' })).resolves.toMatchObject({
structuredContent: {
runId: 'run-42',
status: 'done',
stage: 'done',
progress: 1,
done: true,
adapter: 'fake',
connectionId: 'warehouse',
sourceDir: '/tmp/upload',
syncId: '2026-04-27-120000-run-42',
previousRunId: 'run-41',
diffSummary: {
added: 0,
modified: 1,
deleted: 0,
unchanged: 3,
},
rawFileCount: 4,
workUnitCount: 1,
workUnits: [
{
unitKey: 'fake-orders',
rawFiles: ['orders/orders.json'],
peerFileIndex: [],
dependencyPaths: [],
},
],
evictionDeletedRawPaths: [],
errors: [],
},
});
expect(contextTools.ingest?.status).toHaveBeenCalledWith({ runId: 'run-42' });
await expect(getTool(fake.tools, 'ingest_report').handler({ runId: 'report-42' })).resolves.toMatchObject({
structuredContent: {
id: 'report-42',
runId: 'run-42',
jobId: 'job-42',
sourceKey: 'fake',
},
});
expect(contextTools.ingest?.report).toHaveBeenCalledWith({ runId: 'report-42' });
await expect(getTool(fake.tools, 'ingest_replay').handler({ runId: 'run-42' })).resolves.toMatchObject({
structuredContent: {
runId: 'run-42',
reportId: 'report-42',
status: 'done',
adapter: 'fake',
},
});
expect(contextTools.ingest?.replay).toHaveBeenCalledWith({ runId: 'run-42' });
await getTool(fake.tools, 'scan_trigger').handler({
connectionId: 'warehouse',
mode: 'structural',
dryRun: true,
});
expect(contextTools.scan?.trigger).toHaveBeenCalledWith({
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: true,
});
await getTool(fake.tools, 'scan_trigger').handler({
connectionId: 'warehouse',
mode: 'relationships',
detectRelationships: true,
dryRun: false,
});
expect(contextTools.scan?.trigger).toHaveBeenCalledWith({
connectionId: 'warehouse',
mode: 'relationships',
detectRelationships: true,
dryRun: false,
});
await expect(getTool(fake.tools, 'scan_status').handler({ runId: 'scan-run-1' })).resolves.toMatchObject({
structuredContent: {
runId: 'scan-run-1',
status: 'done',
connectionId: 'warehouse',
},
});
await expect(getTool(fake.tools, 'scan_report').handler({ runId: 'missing' })).resolves.toEqual({
content: [{ type: 'text', text: 'Scan report "missing" was not found.' }],
isError: true,
});
await expect(getTool(fake.tools, 'scan_list_artifacts').handler({ runId: 'scan-run-1' })).resolves.toEqual({
content: [
{
type: 'text',
text: JSON.stringify(
{
runId: 'scan-run-1',
artifacts: [
{
path: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
type: 'report',
size: 128,
},
{
path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json',
type: 'raw_source',
size: 64,
},
],
},
null,
2,
),
},
],
structuredContent: {
runId: 'scan-run-1',
artifacts: [
{
path: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
type: 'report',
size: 128,
},
{
path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json',
type: 'raw_source',
size: 64,
},
],
},
});
expect(contextTools.scan?.listArtifacts).toHaveBeenCalledWith({ runId: 'scan-run-1' });
await expect(
getTool(fake.tools, 'scan_read_artifact').handler({
runId: 'scan-run-1',
path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json',
}),
).resolves.toMatchObject({
structuredContent: {
runId: 'scan-run-1',
path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json',
type: 'raw_source',
content: '{"name":"orders"}\n',
},
});
expect(contextTools.scan?.readArtifact).toHaveBeenCalledWith({
runId: 'scan-run-1',
path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json',
});
await expect(
getTool(fake.tools, 'scan_read_artifact').handler({
runId: 'scan-run-1',
path: 'klo.yaml',
}),
).resolves.toEqual({
content: [{ type: 'text', text: 'Scan artifact "klo.yaml" was not found for run "scan-run-1".' }],
isError: true,
});
});
});

View file

@ -0,0 +1,94 @@
import { randomUUID } from 'node:crypto';
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import type { MemoryAgentInput } from '../memory/index.js';
import { jsonErrorToolResult, jsonToolResult, registerKloContextTools } from './context-tools.js';
import type { KloMcpServerDeps, KloMcpServerLike, MemoryCapturePort } from './types.js';
const memoryCaptureInputSchema = {
userMessage: z.string().min(1).describe('The user message that may contain durable knowledge.'),
assistantMessage: z.string().optional().describe('The assistant response that concluded the exchange.'),
connectionId: z.string().min(1).optional().describe('Optional connection id for semantic-layer capture.'),
};
const memoryCaptureStatusInputSchema = {
runId: z.string().min(1).describe('The memory capture run id returned by memory_capture.'),
};
function registerMemoryCaptureTools(deps: {
server: KloMcpServerLike;
memoryCapture: MemoryCapturePort;
userContext: KloMcpServerDeps['userContext'];
}): void {
deps.server.registerTool(
'memory_capture',
{
title: 'Memory Capture',
description:
'Capture durable knowledge and semantic-layer updates from the final user/assistant exchange. Returns a run id for polling.',
inputSchema: memoryCaptureInputSchema,
},
async (input) => {
const captureInput: MemoryAgentInput = {
userId: deps.userContext.userId,
chatId: `mcp-${randomUUID()}`,
userMessage: String(input.userMessage),
assistantMessage: typeof input.assistantMessage === 'string' ? input.assistantMessage : undefined,
connectionId: typeof input.connectionId === 'string' ? input.connectionId : undefined,
sourceType: 'external_ingest',
};
const result = await deps.memoryCapture.capture(captureInput);
return jsonToolResult(result);
},
);
deps.server.registerTool(
'memory_capture_status',
{
title: 'Memory Capture Status',
description: 'Read the current or final status for a memory capture run.',
inputSchema: memoryCaptureStatusInputSchema,
},
async (input) => {
const runId = String(input.runId);
const status = await deps.memoryCapture.status(runId);
return status ? jsonToolResult(status) : jsonErrorToolResult(`Memory capture run "${runId}" was not found.`);
},
);
}
export function createKloMcpServer(deps: KloMcpServerDeps): KloMcpServerDeps['server'] {
if (deps.memoryCapture) {
registerMemoryCaptureTools({
server: deps.server,
memoryCapture: deps.memoryCapture,
userContext: deps.userContext,
});
}
if (deps.contextTools) {
registerKloContextTools({
server: deps.server,
ports: deps.contextTools,
userContext: deps.userContext,
});
}
return deps.server;
}
export function createDefaultKloMcpServer(
deps: Omit<KloMcpServerDeps, 'server'> & { name?: string; version?: string },
): McpServer {
const server = new McpServer({
name: deps.name ?? 'klo',
version: deps.version ?? '0.0.0-private',
});
createKloMcpServer({
server: server as KloMcpServerLike,
memoryCapture: deps.memoryCapture,
userContext: deps.userContext,
contextTools: deps.contextTools,
});
return server;
}

View file

@ -0,0 +1,326 @@
import type { IngestReportSnapshot, MemoryFlowReplayInput } from '../ingest/index.js';
import type { MemoryCaptureService } from '../memory/index.js';
import type { KloScanMode, KloScanReport } from '../scan/index.js';
import type {
SemanticLayerQueryInput,
SlDictionaryMatch,
SlSearchLaneSummary,
SlSearchMatchReason,
} from '../sl/index.js';
import type { WikiSearchLaneSummary, WikiSearchMatchReason } from '../wiki/index.js';
export interface KloMcpTextContent {
type: 'text';
text: string;
}
export interface KloMcpToolResult<T extends object = object> {
content: KloMcpTextContent[];
structuredContent?: T;
isError?: true;
}
export interface MemoryCapturePort {
capture: MemoryCaptureService['capture'];
status: MemoryCaptureService['status'];
}
export interface KloMcpUserContext {
userId: string;
}
export interface KloMcpServerLike {
registerTool(
name: string,
config: {
title?: string;
description?: string;
inputSchema: unknown;
},
handler: (input: Record<string, unknown>) => Promise<unknown>,
): void;
}
export interface KloConnectionSummary {
id: string;
name: string;
connectionType: string;
}
export interface KloConnectionTestResponse {
id: string;
connectionType: string;
ok: boolean;
tableCount: number | null;
message: string;
warnings: string[];
}
export interface KloConnectionsMcpPort {
list(): Promise<KloConnectionSummary[]>;
test?(input: { connectionId: string }): Promise<KloConnectionTestResponse | null>;
}
export interface KloKnowledgeSearchResult {
key: string;
path: string;
scope: 'GLOBAL' | 'USER';
summary: string;
score: number;
matchReasons?: WikiSearchMatchReason[];
lanes?: WikiSearchLaneSummary[];
}
export interface KloKnowledgeSearchResponse {
results: KloKnowledgeSearchResult[];
totalFound: number;
}
export interface KloKnowledgePage {
key: string;
summary: string;
content: string;
scope: 'GLOBAL' | 'USER';
tags?: string[];
refs?: string[];
slRefs?: string[];
}
interface KloHistoricSqlKnowledgeUsage {
executions: number;
distinct_users: number;
first_seen: string;
last_seen: string;
p50_runtime_ms: number | null;
p95_runtime_ms: number | null;
error_rate: number;
rows_produced?: number;
}
export interface KloKnowledgeWriteResponse {
success: boolean;
key: string;
action: 'created' | 'updated';
}
export interface KloKnowledgeMcpPort {
search(input: { userId: string; query: string; limit: number }): Promise<KloKnowledgeSearchResponse>;
read(input: { userId: string; key: string }): Promise<KloKnowledgePage | null>;
write(input: {
userId: string;
key: string;
summary: string;
content: string;
tags?: string[];
refs?: string[];
slRefs?: string[];
source?: string;
intent?: string;
tables?: string[];
representativeSql?: string;
usage?: KloHistoricSqlKnowledgeUsage;
fingerprints?: string[];
}): Promise<KloKnowledgeWriteResponse>;
}
export interface KloSemanticLayerSourceSummary {
connectionId: string;
connectionName: string;
name: string;
description?: string;
columnCount: number;
measureCount: number;
joinCount: number;
score?: number;
matchReasons?: SlSearchMatchReason[];
dictionaryMatches?: SlDictionaryMatch[];
lanes?: SlSearchLaneSummary[];
}
export interface KloSemanticLayerListResponse {
sources: KloSemanticLayerSourceSummary[];
totalSources: number;
}
export interface KloSemanticLayerReadResponse {
sourceName: string;
yaml: string;
}
export interface KloSemanticLayerWriteResponse {
success: boolean;
sourceName: string;
yaml?: string;
errors?: string[];
warnings?: string[];
commitHash?: string;
}
export interface KloSemanticLayerValidationResponse {
success: boolean;
errors: string[];
warnings: string[];
}
export interface KloSemanticLayerQueryResponse {
sql: string;
headers: string[];
rows: unknown[][];
totalRows: number;
plan?: Record<string, unknown>;
}
export interface KloSemanticLayerMcpPort {
listSources(input: { connectionId?: string; query?: string }): Promise<KloSemanticLayerListResponse>;
readSource(input: { connectionId: string; sourceName: string }): Promise<KloSemanticLayerReadResponse | null>;
writeSource(input: {
connectionId: string;
sourceName: string;
yaml?: string;
source?: Record<string, unknown>;
delete?: boolean;
}): Promise<KloSemanticLayerWriteResponse>;
validate(input: { connectionId: string; names?: string[] }): Promise<KloSemanticLayerValidationResponse>;
query(input: { connectionId?: string; query: SemanticLayerQueryInput }): Promise<KloSemanticLayerQueryResponse>;
}
export type KloIngestTriggerKind = 'upload' | 'scheduled_pull' | 'manual_resync';
interface KloIngestTriggerFanoutChild {
runId: string;
jobId: string;
reportId: string;
targetConnectionId: string;
metabaseDatabaseId: number;
}
export interface KloIngestTriggerResponse {
runId: string;
jobId?: string;
reportId?: string;
fanout?: {
status: 'all_succeeded' | 'partial_failure' | 'all_failed';
children: KloIngestTriggerFanoutChild[];
};
}
export interface KloIngestDiffSummary {
added: number;
modified: number;
deleted: number;
unchanged: number;
}
export interface KloIngestWorkUnitSummary {
unitKey: string;
rawFiles: string[];
peerFileIndex: string[];
dependencyPaths: string[];
}
export interface KloIngestStatusResponse {
runId: string;
jobId?: string;
reportId?: string;
status: string;
stage?: string;
progress?: number;
errors?: string[];
done: boolean;
adapter?: string;
connectionId?: string;
sourceDir?: string | null;
syncId?: string;
startedAt?: string;
completedAt?: string;
previousRunId?: string | null;
diffSummary?: KloIngestDiffSummary;
workUnitCount?: number;
rawFileCount?: number;
workUnits?: KloIngestWorkUnitSummary[];
evictionDeletedRawPaths?: string[];
}
export interface KloIngestMcpPort {
trigger(input: {
adapter: string;
connectionId: string;
config?: unknown;
trigger: KloIngestTriggerKind;
}): Promise<KloIngestTriggerResponse>;
status(input: { runId: string }): Promise<KloIngestStatusResponse | null>;
report?(input: { runId: string }): Promise<IngestReportSnapshot | null>;
replay?(input: { runId: string }): Promise<MemoryFlowReplayInput | null>;
}
interface KloScanTriggerResponse {
runId: string;
status: 'done';
done: true;
connectionId: string;
mode: KloScanMode;
dryRun: boolean;
syncId: string;
report: KloScanReport;
}
interface KloScanStatusResponse {
runId: string;
status: string;
done: boolean;
connectionId: string;
mode: KloScanMode;
dryRun: boolean;
syncId: string;
progress: number;
startedAt: string;
completedAt: string;
reportPath: string | null;
warnings: KloScanReport['warnings'];
}
export type KloScanArtifactType = 'report' | 'raw_source' | 'manifest_shard' | 'enrichment_artifact';
export interface KloScanArtifactSummary {
path: string;
type: KloScanArtifactType;
size?: number;
}
export interface KloScanArtifactListResponse {
runId: string;
artifacts: KloScanArtifactSummary[];
}
export interface KloScanArtifactReadResponse extends KloScanArtifactSummary {
runId: string;
content: string;
}
export interface KloScanMcpPort {
trigger(input: {
connectionId: string;
mode?: KloScanMode;
detectRelationships: boolean;
dryRun: boolean;
}): Promise<KloScanTriggerResponse>;
status(input: { runId: string }): Promise<KloScanStatusResponse | null>;
report(input: { runId: string }): Promise<KloScanReport | null>;
listArtifacts?(input: { runId: string }): Promise<KloScanArtifactListResponse | null>;
readArtifact?(input: { runId: string; path: string }): Promise<KloScanArtifactReadResponse | null>;
}
export interface KloMcpContextPorts {
connections?: KloConnectionsMcpPort;
knowledge?: KloKnowledgeMcpPort;
semanticLayer?: KloSemanticLayerMcpPort;
ingest?: KloIngestMcpPort;
scan?: KloScanMcpPort;
}
export interface KloMcpServerDeps {
server: KloMcpServerLike;
memoryCapture?: MemoryCapturePort;
userContext: KloMcpUserContext;
contextTools?: KloMcpContextPorts;
}