mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat(mcp):added MCP server (#97)
* docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
parent
c7b64379bf
commit
b759a4a286
78 changed files with 13689 additions and 190 deletions
|
|
@ -51,7 +51,10 @@ from ktx_daemon.source_generation import (
|
|||
from ktx_daemon.sql_analysis import (
|
||||
AnalyzeSqlBatchRequest,
|
||||
AnalyzeSqlBatchResponse,
|
||||
ValidateReadOnlySqlRequest,
|
||||
ValidateReadOnlySqlResponse,
|
||||
analyze_sql_batch_response,
|
||||
validate_read_only_sql_response,
|
||||
)
|
||||
from ktx_daemon.table_identifier import (
|
||||
ParseTableIdentifierBatchRequest,
|
||||
|
|
@ -198,6 +201,19 @@ def create_app(
|
|||
detail=f"Table identifier parsing failed: {error}",
|
||||
) from error
|
||||
|
||||
@app.post("/sql/validate-read-only", response_model=ValidateReadOnlySqlResponse)
|
||||
async def sql_validate_read_only(
|
||||
request: ValidateReadOnlySqlRequest,
|
||||
) -> ValidateReadOnlySqlResponse:
|
||||
try:
|
||||
return validate_read_only_sql_response(request)
|
||||
except Exception as error:
|
||||
logger.exception("SQL read-only validation failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"SQL read-only validation failed: {error}",
|
||||
) from error
|
||||
|
||||
@app.post("/sql/analyze-batch", response_model=AnalyzeSqlBatchResponse)
|
||||
async def sql_analyze_batch(
|
||||
request: AnalyzeSqlBatchRequest,
|
||||
|
|
|
|||
|
|
@ -34,6 +34,46 @@ class AnalyzeSqlBatchResponse(BaseModel):
|
|||
results: dict[str, AnalyzeSqlBatchResult]
|
||||
|
||||
|
||||
class ValidateReadOnlySqlRequest(BaseModel):
|
||||
dialect: str
|
||||
sql: str
|
||||
|
||||
|
||||
class ValidateReadOnlySqlResponse(BaseModel):
|
||||
ok: bool
|
||||
error: str | None = None
|
||||
|
||||
|
||||
_READ_ONLY_ROOT_TYPES = (exp.Select, exp.Union)
|
||||
_READ_WRITE_NODE_TYPES = (
|
||||
exp.Alter,
|
||||
exp.Analyze,
|
||||
exp.Cache,
|
||||
exp.Command,
|
||||
exp.Commit,
|
||||
exp.Copy,
|
||||
exp.Create,
|
||||
exp.Delete,
|
||||
exp.Describe,
|
||||
exp.Drop,
|
||||
exp.Execute,
|
||||
exp.Grant,
|
||||
exp.Insert,
|
||||
exp.Merge,
|
||||
exp.Pragma,
|
||||
exp.Refresh,
|
||||
exp.Revoke,
|
||||
exp.Rollback,
|
||||
exp.Set,
|
||||
exp.Show,
|
||||
exp.Transaction,
|
||||
exp.TruncateTable,
|
||||
exp.Uncache,
|
||||
exp.Update,
|
||||
exp.Use,
|
||||
)
|
||||
|
||||
|
||||
def _ordered_unique(values: list[str]) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
result: list[str] = []
|
||||
|
|
@ -137,6 +177,42 @@ def _analyze_payload(
|
|||
return _analyze_one(item_id, sql, dialect)
|
||||
|
||||
|
||||
def validate_read_only_sql_response(
|
||||
request: ValidateReadOnlySqlRequest,
|
||||
) -> ValidateReadOnlySqlResponse:
|
||||
try:
|
||||
statements = sqlglot.parse(request.sql, read=request.dialect)
|
||||
except sqlglot.errors.SqlglotError as exc:
|
||||
return ValidateReadOnlySqlResponse(ok=False, error=f"Invalid expression: {exc}")
|
||||
|
||||
if len(statements) != 1:
|
||||
return ValidateReadOnlySqlResponse(
|
||||
ok=False,
|
||||
error="Only one SQL statement can be executed.",
|
||||
)
|
||||
|
||||
tree = statements[0]
|
||||
if tree is None:
|
||||
return ValidateReadOnlySqlResponse(
|
||||
ok=False,
|
||||
error="SQL did not parse to a statement.",
|
||||
)
|
||||
if not isinstance(tree, _READ_ONLY_ROOT_TYPES):
|
||||
return ValidateReadOnlySqlResponse(
|
||||
ok=False,
|
||||
error=f"SQL contains read/write operation: {type(tree).__name__}",
|
||||
)
|
||||
|
||||
for node in tree.walk():
|
||||
if isinstance(node, _READ_WRITE_NODE_TYPES):
|
||||
return ValidateReadOnlySqlResponse(
|
||||
ok=False,
|
||||
error=f"SQL contains read/write operation: {type(node).__name__}",
|
||||
)
|
||||
|
||||
return ValidateReadOnlySqlResponse(ok=True, error=None)
|
||||
|
||||
|
||||
def _worker_count(request: AnalyzeSqlBatchRequest) -> int:
|
||||
if len(request.items) <= 1:
|
||||
return 1
|
||||
|
|
|
|||
|
|
@ -280,6 +280,30 @@ def test_sql_parse_table_identifier_endpoint() -> None:
|
|||
assert body["results"]["template"]["reason"] == "looker_template_unresolved"
|
||||
|
||||
|
||||
def test_sql_validate_read_only_endpoint() -> None:
|
||||
client = TestClient(create_app())
|
||||
|
||||
ok_response = client.post(
|
||||
"/sql/validate-read-only",
|
||||
json={"dialect": "postgres", "sql": "select * from public.orders"},
|
||||
)
|
||||
bad_response = client.post(
|
||||
"/sql/validate-read-only",
|
||||
json={
|
||||
"dialect": "postgres",
|
||||
"sql": "with x as (insert into audit.events values (1) returning *) select * from x",
|
||||
},
|
||||
)
|
||||
|
||||
assert ok_response.status_code == 200
|
||||
assert ok_response.json() == {"ok": True, "error": None}
|
||||
assert bad_response.status_code == 200
|
||||
assert bad_response.json() == {
|
||||
"ok": False,
|
||||
"error": "SQL contains read/write operation: Insert",
|
||||
}
|
||||
|
||||
|
||||
def test_sql_analyze_batch_endpoint_returns_per_item_results() -> None:
|
||||
client = TestClient(create_app())
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,10 @@ from __future__ import annotations
|
|||
from ktx_daemon.sql_analysis import (
|
||||
AnalyzeSqlBatchItem,
|
||||
AnalyzeSqlBatchRequest,
|
||||
ValidateReadOnlySqlRequest,
|
||||
_columns_from_nodes,
|
||||
analyze_sql_batch_response,
|
||||
validate_read_only_sql_response,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -56,3 +58,74 @@ def test_analyze_sql_batch_returns_per_item_parse_errors() -> None:
|
|||
|
||||
def test_columns_from_nodes_ignores_non_expression_clause_values() -> None:
|
||||
assert _columns_from_nodes([True, False, None]) == []
|
||||
|
||||
|
||||
def test_validate_read_only_sql_accepts_select_and_with_queries() -> None:
|
||||
select_response = validate_read_only_sql_response(
|
||||
ValidateReadOnlySqlRequest(
|
||||
dialect="postgres",
|
||||
sql="select id, status from public.orders where status = 'paid'",
|
||||
)
|
||||
)
|
||||
with_response = validate_read_only_sql_response(
|
||||
ValidateReadOnlySqlRequest(
|
||||
dialect="postgres",
|
||||
sql=(
|
||||
"with paid as (select * from public.orders where status = 'paid') "
|
||||
"select count(*) from paid"
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
assert select_response.ok is True
|
||||
assert select_response.error is None
|
||||
assert with_response.ok is True
|
||||
assert with_response.error is None
|
||||
|
||||
|
||||
def test_validate_read_only_sql_rejects_cte_dml() -> None:
|
||||
response = validate_read_only_sql_response(
|
||||
ValidateReadOnlySqlRequest(
|
||||
dialect="postgres",
|
||||
sql="with x as (insert into audit.events values (1) returning *) select * from x",
|
||||
)
|
||||
)
|
||||
|
||||
assert response.ok is False
|
||||
assert response.error == "SQL contains read/write operation: Insert"
|
||||
|
||||
|
||||
def test_validate_read_only_sql_rejects_multi_statement_payloads() -> None:
|
||||
response = validate_read_only_sql_response(
|
||||
ValidateReadOnlySqlRequest(
|
||||
dialect="postgres",
|
||||
sql="select * from public.orders; delete from public.orders",
|
||||
)
|
||||
)
|
||||
|
||||
assert response.ok is False
|
||||
assert response.error == "Only one SQL statement can be executed."
|
||||
|
||||
|
||||
def test_validate_read_only_sql_rejects_commands_and_pragmas() -> None:
|
||||
command_response = validate_read_only_sql_response(
|
||||
ValidateReadOnlySqlRequest(dialect="postgres", sql="call refresh_stats()")
|
||||
)
|
||||
pragma_response = validate_read_only_sql_response(
|
||||
ValidateReadOnlySqlRequest(dialect="sqlite", sql="pragma table_info(users)")
|
||||
)
|
||||
|
||||
assert command_response.ok is False
|
||||
assert command_response.error == "SQL contains read/write operation: Command"
|
||||
assert pragma_response.ok is False
|
||||
assert pragma_response.error == "SQL contains read/write operation: Pragma"
|
||||
|
||||
|
||||
def test_validate_read_only_sql_reports_parse_errors() -> None:
|
||||
response = validate_read_only_sql_response(
|
||||
ValidateReadOnlySqlRequest(dialect="postgres", sql="select * from where")
|
||||
)
|
||||
|
||||
assert response.ok is False
|
||||
assert response.error is not None
|
||||
assert "Invalid expression" in response.error
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue