ktx/packages/context/src/sl/dictionary-search.ts
Andrey Avtomonov b759a4a286
feat(mcp):added MCP server (#97)
* docs(specs): design research-agent MCP tools and ktx mcp daemon

Adds the 2026-05-14 design spec for exposing four new MCP tools
(discover_data, entity_details, dictionary_search, sql_execution),
shipping a ktx-research skill, and introducing an HTTP-only ktx mcp
daemon so external agents can use KTX as a research-capable context
layer.

* Refine research-agent MCP tools spec after adversarial review iteration 1

* Refine research-agent MCP tools spec after adversarial review iteration 2

* Refine research-agent MCP tools spec after adversarial review iteration 3

* Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind

* feat(daemon): validate read-only SQL with sqlglot

* feat(context): expose read-only SQL validation port

* feat(context): register MCP sql execution tool

* feat(context): execute MCP SQL through validated connector path

* test(context): update SQL analysis port fixtures

* docs: add research-agent MCP sql execution foundation plan

* feat(context): add scan-backed entity details service

* feat(context): register MCP entity details tool

* feat(context): expose local MCP entity details

* test(context): align entity details scan fixtures

* docs: add research-agent MCP entity_details plan

* feat(context): add dictionary search service

* feat(context): register MCP dictionary search tool

* feat(context): expose local MCP dictionary search

* docs: add research-agent MCP dictionary_search plan

* feat: add MCP discover data service

* feat: expose discover data MCP tool

* feat: wire local discover data MCP port

* docs: add research-agent MCP discover_data plan

* feat(cli): add mcp http security helpers

* feat(cli): host mcp over streamable http

* feat(cli): manage mcp daemon lifecycle

* feat(cli): add ktx mcp commands

* fix(cli): stabilize mcp daemon verification

* docs: add research-agent MCP http daemon plan

* feat(cli): install KTX research skill

* feat(cli): configure MCP clients in setup agents

* feat(cli): support Claude local MCP setup scope

* docs: add research-agent MCP setup-agents plan

* refactor(context): use connectionId in warehouse verification tools

* docs(context): update ingest verification prompts for connectionId

* docs: add research-agent MCP ingest contract convergence plan

* chore: build runtime artifacts in conductor setup

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-15 02:35:09 +02:00

214 lines
6.8 KiB
TypeScript

import type { KtxLocalProject } from '../project/index.js';
import { loadLatestSlDictionaryEntries, type SlDictionaryEntry } from './sl-dictionary-profile.js';
export type KtxDictionarySearchStatus = 'ready' | 'no_profile_artifact' | 'no_candidate_columns';
export type KtxDictionarySearchMissReason = 'no_profile_artifact' | 'no_candidate_columns' | 'value_not_in_sample';
export interface KtxDictionarySearchInput {
values: string[];
connectionId?: string;
}
export interface KtxDictionarySearchCoverage {
sampledRows: number | null;
valuesPerColumn: number | null;
profiledColumns: number;
syncId: string | null;
profiledAt: string | null;
}
export interface KtxDictionarySearchSearchedConnection {
connectionId: string;
coverage: KtxDictionarySearchCoverage;
status: KtxDictionarySearchStatus;
}
export interface KtxDictionarySearchMatch {
connectionId: string;
sourceName: string;
columnName: string;
matchedValue: string;
cardinality: number | null;
}
export interface KtxDictionarySearchMiss {
connectionId: string;
reason: KtxDictionarySearchMissReason;
}
export interface KtxDictionarySearchValueResult {
value: string;
matches: KtxDictionarySearchMatch[];
misses: KtxDictionarySearchMiss[];
}
export interface KtxDictionarySearchResponse {
searched: KtxDictionarySearchSearchedConnection[];
results: KtxDictionarySearchValueResult[];
}
interface RelationshipProfileArtifact {
connectionId?: string;
profileSampleRows?: unknown;
sampleValuesPerColumn?: unknown;
profiledAt?: unknown;
extractedAt?: unknown;
}
function uniqueSorted(values: Iterable<string>): string[] {
return [...new Set([...values].filter((value) => value.trim().length > 0))].sort((left, right) =>
left.localeCompare(right),
);
}
function latestProfileSyncId(path: string): string | null {
const parts = path.split('/');
return parts.at(-3) ?? null;
}
function optionalNumber(value: unknown): number | null {
return typeof value === 'number' && Number.isFinite(value) ? value : null;
}
function optionalString(value: unknown): string | null {
return typeof value === 'string' && value.trim().length > 0 ? value : null;
}
async function latestProfilePath(project: KtxLocalProject, connectionId: string): Promise<string | null> {
const root = `raw-sources/${connectionId}/live-database`;
let files: string[];
try {
files = (await project.fileStore.listFiles(root)).files;
} catch {
return null;
}
return (
files
.filter((path) => path.endsWith('/enrichment/relationship-profile.json'))
.sort((left, right) => left.localeCompare(right))
.at(-1) ?? null
);
}
async function readProfile(project: KtxLocalProject, path: string): Promise<RelationshipProfileArtifact> {
const raw = await project.fileStore.readFile(path);
const parsed = JSON.parse(raw.content) as unknown;
return typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)
? (parsed as RelationshipProfileArtifact)
: {};
}
function profiledColumnCount(entries: readonly SlDictionaryEntry[]): number {
return new Set(entries.map((entry) => `${entry.sourceName}\u001f${entry.columnName}`)).size;
}
async function searchedConnection(
project: KtxLocalProject,
connectionId: string,
entries: readonly SlDictionaryEntry[],
): Promise<KtxDictionarySearchSearchedConnection> {
const path = await latestProfilePath(project, connectionId);
if (!path) {
return {
connectionId,
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: null,
profiledAt: null,
},
status: 'no_profile_artifact',
};
}
const profile = await readProfile(project, path);
const count = profiledColumnCount(entries);
return {
connectionId,
coverage: {
sampledRows: optionalNumber(profile.profileSampleRows),
valuesPerColumn: optionalNumber(profile.sampleValuesPerColumn),
profiledColumns: count,
syncId: latestProfileSyncId(path),
profiledAt: optionalString(profile.profiledAt) ?? optionalString(profile.extractedAt),
},
status: count > 0 ? 'ready' : 'no_candidate_columns',
};
}
function entryMatchesValue(entry: SlDictionaryEntry, value: string): boolean {
return entry.value.toLowerCase().includes(value.toLowerCase());
}
function toMatch(entry: SlDictionaryEntry): KtxDictionarySearchMatch {
return {
connectionId: entry.connectionId,
sourceName: entry.sourceName,
columnName: entry.columnName,
matchedValue: entry.value,
cardinality: entry.cardinality,
};
}
function sortMatches(matches: KtxDictionarySearchMatch[]): KtxDictionarySearchMatch[] {
return matches.sort(
(left, right) =>
left.connectionId.localeCompare(right.connectionId) ||
left.sourceName.localeCompare(right.sourceName) ||
left.columnName.localeCompare(right.columnName) ||
left.matchedValue.localeCompare(right.matchedValue),
);
}
function missReason(status: KtxDictionarySearchStatus): KtxDictionarySearchMissReason {
return status === 'ready' ? 'value_not_in_sample' : status;
}
export function createKtxDictionarySearchService(project: KtxLocalProject): {
search(input: KtxDictionarySearchInput): Promise<KtxDictionarySearchResponse>;
} {
return {
async search(input) {
const connectionIds = input.connectionId
? [input.connectionId]
: uniqueSorted(Object.keys(project.config.connections));
const entries = await loadLatestSlDictionaryEntries(project, connectionIds);
const entriesByConnection = new Map<string, SlDictionaryEntry[]>();
for (const connectionId of connectionIds) {
entriesByConnection.set(
connectionId,
entries.filter((entry) => entry.connectionId === connectionId),
);
}
const searched = (
await Promise.all(
connectionIds.map((connectionId) =>
searchedConnection(project, connectionId, entriesByConnection.get(connectionId) ?? []),
),
)
).sort((left, right) => left.connectionId.localeCompare(right.connectionId));
const searchedByConnection = new Map(searched.map((connection) => [connection.connectionId, connection]));
return {
searched,
results: input.values.map((value) => {
const matches = sortMatches(entries.filter((entry) => entryMatchesValue(entry, value)).map(toMatch));
const matchedConnections = new Set(matches.map((match) => match.connectionId));
return {
value,
matches,
misses: searched
.filter((connection) => !matchedConnections.has(connection.connectionId))
.map((connection) => ({
connectionId: connection.connectionId,
reason: missReason(searchedByConnection.get(connection.connectionId)?.status ?? 'no_profile_artifact'),
})),
};
}),
};
},
};
}