feat(mcp):added MCP server (#97)

* docs(specs): design research-agent MCP tools and ktx mcp daemon

Adds the 2026-05-14 design spec for exposing four new MCP tools
(discover_data, entity_details, dictionary_search, sql_execution),
shipping a ktx-research skill, and introducing an HTTP-only ktx mcp
daemon so external agents can use KTX as a research-capable context
layer.

* Refine research-agent MCP tools spec after adversarial review iteration 1

* Refine research-agent MCP tools spec after adversarial review iteration 2

* Refine research-agent MCP tools spec after adversarial review iteration 3

* Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind

* feat(daemon): validate read-only SQL with sqlglot

* feat(context): expose read-only SQL validation port

* feat(context): register MCP sql execution tool

* feat(context): execute MCP SQL through validated connector path

* test(context): update SQL analysis port fixtures

* docs: add research-agent MCP sql execution foundation plan

* feat(context): add scan-backed entity details service

* feat(context): register MCP entity details tool

* feat(context): expose local MCP entity details

* test(context): align entity details scan fixtures

* docs: add research-agent MCP entity_details plan

* feat(context): add dictionary search service

* feat(context): register MCP dictionary search tool

* feat(context): expose local MCP dictionary search

* docs: add research-agent MCP dictionary_search plan

* feat: add MCP discover data service

* feat: expose discover data MCP tool

* feat: wire local discover data MCP port

* docs: add research-agent MCP discover_data plan

* feat(cli): add mcp http security helpers

* feat(cli): host mcp over streamable http

* feat(cli): manage mcp daemon lifecycle

* feat(cli): add ktx mcp commands

* fix(cli): stabilize mcp daemon verification

* docs: add research-agent MCP http daemon plan

* feat(cli): install KTX research skill

* feat(cli): configure MCP clients in setup agents

* feat(cli): support Claude local MCP setup scope

* docs: add research-agent MCP setup-agents plan

* refactor(context): use connectionId in warehouse verification tools

* docs(context): update ingest verification prompts for connectionId

* docs: add research-agent MCP ingest contract convergence plan

* chore: build runtime artifacts in conductor setup

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
Andrey Avtomonov 2026-05-15 02:35:09 +02:00 committed by GitHub
parent c7b64379bf
commit b759a4a286
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
78 changed files with 13689 additions and 190 deletions

View file

@ -51,7 +51,10 @@ from ktx_daemon.source_generation import (
from ktx_daemon.sql_analysis import (
AnalyzeSqlBatchRequest,
AnalyzeSqlBatchResponse,
ValidateReadOnlySqlRequest,
ValidateReadOnlySqlResponse,
analyze_sql_batch_response,
validate_read_only_sql_response,
)
from ktx_daemon.table_identifier import (
ParseTableIdentifierBatchRequest,
@ -198,6 +201,19 @@ def create_app(
detail=f"Table identifier parsing failed: {error}",
) from error
@app.post("/sql/validate-read-only", response_model=ValidateReadOnlySqlResponse)
async def sql_validate_read_only(
request: ValidateReadOnlySqlRequest,
) -> ValidateReadOnlySqlResponse:
try:
return validate_read_only_sql_response(request)
except Exception as error:
logger.exception("SQL read-only validation failed: %s", error)
raise HTTPException(
status_code=500,
detail=f"SQL read-only validation failed: {error}",
) from error
@app.post("/sql/analyze-batch", response_model=AnalyzeSqlBatchResponse)
async def sql_analyze_batch(
request: AnalyzeSqlBatchRequest,

View file

@ -34,6 +34,46 @@ class AnalyzeSqlBatchResponse(BaseModel):
results: dict[str, AnalyzeSqlBatchResult]
class ValidateReadOnlySqlRequest(BaseModel):
dialect: str
sql: str
class ValidateReadOnlySqlResponse(BaseModel):
ok: bool
error: str | None = None
_READ_ONLY_ROOT_TYPES = (exp.Select, exp.Union)
_READ_WRITE_NODE_TYPES = (
exp.Alter,
exp.Analyze,
exp.Cache,
exp.Command,
exp.Commit,
exp.Copy,
exp.Create,
exp.Delete,
exp.Describe,
exp.Drop,
exp.Execute,
exp.Grant,
exp.Insert,
exp.Merge,
exp.Pragma,
exp.Refresh,
exp.Revoke,
exp.Rollback,
exp.Set,
exp.Show,
exp.Transaction,
exp.TruncateTable,
exp.Uncache,
exp.Update,
exp.Use,
)
def _ordered_unique(values: list[str]) -> list[str]:
seen: set[str] = set()
result: list[str] = []
@ -137,6 +177,42 @@ def _analyze_payload(
return _analyze_one(item_id, sql, dialect)
def validate_read_only_sql_response(
request: ValidateReadOnlySqlRequest,
) -> ValidateReadOnlySqlResponse:
try:
statements = sqlglot.parse(request.sql, read=request.dialect)
except sqlglot.errors.SqlglotError as exc:
return ValidateReadOnlySqlResponse(ok=False, error=f"Invalid expression: {exc}")
if len(statements) != 1:
return ValidateReadOnlySqlResponse(
ok=False,
error="Only one SQL statement can be executed.",
)
tree = statements[0]
if tree is None:
return ValidateReadOnlySqlResponse(
ok=False,
error="SQL did not parse to a statement.",
)
if not isinstance(tree, _READ_ONLY_ROOT_TYPES):
return ValidateReadOnlySqlResponse(
ok=False,
error=f"SQL contains read/write operation: {type(tree).__name__}",
)
for node in tree.walk():
if isinstance(node, _READ_WRITE_NODE_TYPES):
return ValidateReadOnlySqlResponse(
ok=False,
error=f"SQL contains read/write operation: {type(node).__name__}",
)
return ValidateReadOnlySqlResponse(ok=True, error=None)
def _worker_count(request: AnalyzeSqlBatchRequest) -> int:
if len(request.items) <= 1:
return 1