feat(mcp):added MCP server (#97)

* docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-06-10 08:05:14 +02:00 · 2026-05-15 02:35:09 +02:00 · 2026-05-15 02:35:09 +02:00 · b759a4a286
commit b759a4a286
parent c7b64379bf
78 changed files with 13689 additions and 190 deletions
--- a/python/ktx-daemon/src/ktx_daemon/app.py
+++ b/python/ktx-daemon/src/ktx_daemon/app.py
@ -51,7 +51,10 @@ from ktx_daemon.source_generation import (
 from ktx_daemon.sql_analysis import (
    AnalyzeSqlBatchRequest,
    AnalyzeSqlBatchResponse,
+    ValidateReadOnlySqlRequest,
+    ValidateReadOnlySqlResponse,
    analyze_sql_batch_response,
+    validate_read_only_sql_response,
 )
 from ktx_daemon.table_identifier import (
    ParseTableIdentifierBatchRequest,
@ -198,6 +201,19 @@ def create_app(
                detail=f"Table identifier parsing failed: {error}",
            ) from error

+    @app.post("/sql/validate-read-only", response_model=ValidateReadOnlySqlResponse)
+    async def sql_validate_read_only(
+        request: ValidateReadOnlySqlRequest,
+    ) -> ValidateReadOnlySqlResponse:
+        try:
+            return validate_read_only_sql_response(request)
+        except Exception as error:
+            logger.exception("SQL read-only validation failed: %s", error)
+            raise HTTPException(
+                status_code=500,
+                detail=f"SQL read-only validation failed: {error}",
+            ) from error
+
    @app.post("/sql/analyze-batch", response_model=AnalyzeSqlBatchResponse)
    async def sql_analyze_batch(
        request: AnalyzeSqlBatchRequest,
--- a/python/ktx-daemon/src/ktx_daemon/sql_analysis.py
+++ b/python/ktx-daemon/src/ktx_daemon/sql_analysis.py
@ -34,6 +34,46 @@ class AnalyzeSqlBatchResponse(BaseModel):
    results: dict[str, AnalyzeSqlBatchResult]


+class ValidateReadOnlySqlRequest(BaseModel):
+    dialect: str
+    sql: str
+
+
+class ValidateReadOnlySqlResponse(BaseModel):
+    ok: bool
+    error: str | None = None
+
+
+_READ_ONLY_ROOT_TYPES = (exp.Select, exp.Union)
+_READ_WRITE_NODE_TYPES = (
+    exp.Alter,
+    exp.Analyze,
+    exp.Cache,
+    exp.Command,
+    exp.Commit,
+    exp.Copy,
+    exp.Create,
+    exp.Delete,
+    exp.Describe,
+    exp.Drop,
+    exp.Execute,
+    exp.Grant,
+    exp.Insert,
+    exp.Merge,
+    exp.Pragma,
+    exp.Refresh,
+    exp.Revoke,
+    exp.Rollback,
+    exp.Set,
+    exp.Show,
+    exp.Transaction,
+    exp.TruncateTable,
+    exp.Uncache,
+    exp.Update,
+    exp.Use,
+)
+
+
 def _ordered_unique(values: list[str]) -> list[str]:
    seen: set[str] = set()
    result: list[str] = []
@ -137,6 +177,42 @@ def _analyze_payload(
    return _analyze_one(item_id, sql, dialect)


+def validate_read_only_sql_response(
+    request: ValidateReadOnlySqlRequest,
+) -> ValidateReadOnlySqlResponse:
+    try:
+        statements = sqlglot.parse(request.sql, read=request.dialect)
+    except sqlglot.errors.SqlglotError as exc:
+        return ValidateReadOnlySqlResponse(ok=False, error=f"Invalid expression: {exc}")
+
+    if len(statements) != 1:
+        return ValidateReadOnlySqlResponse(
+            ok=False,
+            error="Only one SQL statement can be executed.",
+        )
+
+    tree = statements[0]
+    if tree is None:
+        return ValidateReadOnlySqlResponse(
+            ok=False,
+            error="SQL did not parse to a statement.",
+        )
+    if not isinstance(tree, _READ_ONLY_ROOT_TYPES):
+        return ValidateReadOnlySqlResponse(
+            ok=False,
+            error=f"SQL contains read/write operation: {type(tree).__name__}",
+        )
+
+    for node in tree.walk():
+        if isinstance(node, _READ_WRITE_NODE_TYPES):
+            return ValidateReadOnlySqlResponse(
+                ok=False,
+                error=f"SQL contains read/write operation: {type(node).__name__}",
+            )
+
+    return ValidateReadOnlySqlResponse(ok=True, error=None)
+
+
 def _worker_count(request: AnalyzeSqlBatchRequest) -> int:
    if len(request.items) <= 1:
        return 1