feat(mcp):added MCP server (#97)

* docs(specs): design research-agent MCP tools and ktx mcp daemon

Adds the 2026-05-14 design spec for exposing four new MCP tools
(discover_data, entity_details, dictionary_search, sql_execution),
shipping a ktx-research skill, and introducing an HTTP-only ktx mcp
daemon so external agents can use KTX as a research-capable context
layer.

* Refine research-agent MCP tools spec after adversarial review iteration 1

* Refine research-agent MCP tools spec after adversarial review iteration 2

* Refine research-agent MCP tools spec after adversarial review iteration 3

* Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind

* feat(daemon): validate read-only SQL with sqlglot

* feat(context): expose read-only SQL validation port

* feat(context): register MCP sql execution tool

* feat(context): execute MCP SQL through validated connector path

* test(context): update SQL analysis port fixtures

* docs: add research-agent MCP sql execution foundation plan

* feat(context): add scan-backed entity details service

* feat(context): register MCP entity details tool

* feat(context): expose local MCP entity details

* test(context): align entity details scan fixtures

* docs: add research-agent MCP entity_details plan

* feat(context): add dictionary search service

* feat(context): register MCP dictionary search tool

* feat(context): expose local MCP dictionary search

* docs: add research-agent MCP dictionary_search plan

* feat: add MCP discover data service

* feat: expose discover data MCP tool

* feat: wire local discover data MCP port

* docs: add research-agent MCP discover_data plan

* feat(cli): add mcp http security helpers

* feat(cli): host mcp over streamable http

* feat(cli): manage mcp daemon lifecycle

* feat(cli): add ktx mcp commands

* fix(cli): stabilize mcp daemon verification

* docs: add research-agent MCP http daemon plan

* feat(cli): install KTX research skill

* feat(cli): configure MCP clients in setup agents

* feat(cli): support Claude local MCP setup scope

* docs: add research-agent MCP setup-agents plan

* refactor(context): use connectionId in warehouse verification tools

* docs(context): update ingest verification prompts for connectionId

* docs: add research-agent MCP ingest contract convergence plan

* chore: build runtime artifacts in conductor setup

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
Andrey Avtomonov 2026-05-15 02:35:09 +02:00 committed by GitHub
parent c7b64379bf
commit b759a4a286
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
78 changed files with 13689 additions and 190 deletions

View file

@ -143,6 +143,45 @@ const scanArtifactReadSchema = z.object({
path: z.string().min(1),
});
const entityDetailsTableRefSchema = z.object({
catalog: z.string().nullable(),
db: z.string().nullable(),
name: z.string().min(1),
});
const entityDetailsSchema = z.object({
connectionId: connectionIdSchema,
entities: z
.array(
z.object({
table: z.union([z.string().min(1), entityDetailsTableRefSchema]),
columns: z.array(z.string().min(1)).optional(),
}),
)
.min(1)
.max(20),
});
const dictionarySearchSchema = z.object({
values: z.array(z.string().min(1)).min(1).max(20),
connectionId: connectionIdSchema.optional(),
});
const discoverDataKindSchema = z.enum(['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column']);
const discoverDataSchema = z.object({
query: z.string().min(1),
connectionId: connectionIdSchema.optional(),
kinds: z.array(discoverDataKindSchema).optional(),
limit: z.number().int().min(1).max(50).default(15).optional(),
});
const sqlExecutionSchema = z.object({
connectionId: connectionIdSchema,
sql: z.string().min(1),
maxRows: z.number().int().min(1).max(10_000).default(1000).optional(),
});
export function jsonToolResult<T extends object>(structuredContent: T): KtxMcpToolResult<T> {
return {
content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }],
@ -361,6 +400,81 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void
);
}
if (ports.entityDetails) {
const entityDetails = ports.entityDetails;
registerParsedTool(
server,
'entity_details',
{
title: 'Entity Details',
description: 'Read raw table and column metadata from the latest KTX live-database scan snapshot.',
inputSchema: entityDetailsSchema.shape,
},
entityDetailsSchema,
async (input) => jsonToolResult(await entityDetails.read(input)),
);
}
if (ports.dictionarySearch) {
const dictionarySearch = ports.dictionarySearch;
registerParsedTool(
server,
'dictionary_search',
{
title: 'Dictionary Search',
description:
'Search profile-sampled warehouse values and report matching connection/source/column locations plus non-authoritative miss reasons.',
inputSchema: dictionarySearchSchema.shape,
},
dictionarySearchSchema,
async (input) => jsonToolResult(await dictionarySearch.search(input)),
);
}
if (ports.discover) {
const discover = ports.discover;
registerParsedTool(
server,
'discover_data',
{
title: 'Discover Data',
description:
'Search across KTX wiki pages, semantic-layer sources/measures/dimensions, and raw warehouse schema refs.',
inputSchema: discoverDataSchema.shape,
},
discoverDataSchema,
async (input) => jsonToolResult(await discover.search(input)),
);
}
if (ports.sqlExecution) {
const sqlExecution = ports.sqlExecution;
registerParsedTool(
server,
'sql_execution',
{
title: 'SQL Execution',
description:
'Execute one parser-validated read-only SQL query against a configured KTX connection and return structured rows.',
inputSchema: sqlExecutionSchema.shape,
},
sqlExecutionSchema,
async (input) => {
try {
return jsonToolResult(
await sqlExecution.execute({
connectionId: input.connectionId,
sql: input.sql,
maxRows: input.maxRows ?? 1000,
}),
);
} catch (error) {
return jsonErrorToolResult(error instanceof Error ? error.message : String(error));
}
},
);
}
if (ports.ingest) {
const ingest = ports.ingest;
registerParsedTool(