ktx/python/ktx-daemon/tests/test_app.py

508 lines
15 KiB
Python
Raw Normal View History

2026-05-10 23:12:26 +02:00
from __future__ import annotations
from fastapi.testclient import TestClient
2026-05-10 23:51:24 +02:00
from ktx_daemon.app import create_app
from ktx_daemon.database_introspection import (
2026-05-10 23:12:26 +02:00
DatabaseIntrospectionResponse,
LiveDatabaseColumn,
LiveDatabaseTable,
)
ORDERS_SOURCE = {
"name": "orders",
"table": "public.orders",
"grain": ["id"],
"columns": [
{"name": "id", "type": "number"},
{"name": "status", "type": "string"},
{"name": "amount", "type": "number"},
],
"joins": [],
"measures": [{"name": "order_count", "expr": "count(*)"}],
}
LOOKML_ORDER_VIEW = """
view: orders {
sql_table_name: public.orders ;;
dimension: id {
primary_key: yes
type: number
sql: ${TABLE}.id ;;
}
dimension: status {
type: string
sql: ${TABLE}.status ;;
}
measure: order_count {
type: count
}
}
"""
class FakeEmbeddingProvider:
name = "fake"
dimensions = 3
max_batch_size = 2
def __init__(self) -> None:
self.calls: list[list[str]] = []
def encode(self, texts: list[str]) -> list[list[float]]:
self.calls.append(list(texts))
return [
[float(len(text)), float(index), 1.0] for index, text in enumerate(texts)
]
def test_health_endpoint_returns_healthy() -> None:
client = TestClient(create_app())
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "healthy"}
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
def test_health_endpoint_returns_managed_runtime_version(monkeypatch) -> None:
monkeypatch.setenv("KTX_DAEMON_VERSION", "0.2.0")
client = TestClient(create_app())
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "healthy", "version": "0.2.0"}
2026-05-10 23:12:26 +02:00
def test_database_introspect_endpoint_returns_snapshot() -> None:
calls = []
def fake_introspector(request):
calls.append(request)
return DatabaseIntrospectionResponse(
connection_id=request.connection_id,
extracted_at="2026-04-28T10:00:00+00:00",
metadata={"driver": request.driver, "schemas": request.schemas},
tables=[
LiveDatabaseTable(
catalog="warehouse",
db="public",
name="orders",
columns=[
LiveDatabaseColumn(
name="id",
type="integer",
nullable=False,
primary_key=True,
)
],
)
],
)
client = TestClient(create_app(database_introspector=fake_introspector))
response = client.post(
"/database/introspect",
json={
"connection_id": "warehouse",
"driver": "postgres",
"url": "postgresql://readonly@example.test/warehouse",
"schemas": ["public"],
},
)
assert response.status_code == 200
assert response.json()["connection_id"] == "warehouse"
assert response.json()["tables"][0]["name"] == "orders"
assert calls[0].connection_id == "warehouse"
def test_database_introspect_endpoint_maps_value_error_to_400() -> None:
def fake_introspector(request):
raise ValueError('database introspection supports only driver "postgres"')
client = TestClient(create_app(database_introspector=fake_introspector))
response = client.post(
"/database/introspect",
json={
"connection_id": "warehouse",
"driver": "snowflake",
"url": "snowflake://example",
},
)
assert response.status_code == 400
assert response.json() == {
"detail": 'database introspection supports only driver "postgres"'
}
def test_embedding_compute_endpoint_returns_embedding() -> None:
provider = FakeEmbeddingProvider()
client = TestClient(create_app(embedding_provider=provider))
response = client.post("/embeddings/compute", json={"text": "hello"})
assert response.status_code == 200
assert response.json() == {"embedding": [5.0, 0.0, 1.0]}
assert provider.calls == [["hello"]]
def test_embedding_compute_bulk_endpoint_returns_embeddings() -> None:
provider = FakeEmbeddingProvider()
client = TestClient(create_app(embedding_provider=provider))
response = client.post(
"/embeddings/compute-bulk",
json={"texts": ["one", "three"]},
)
assert response.status_code == 200
assert response.json() == {"embeddings": [[3.0, 0.0, 1.0], [5.0, 1.0, 1.0]]}
assert provider.calls == [["one", "three"]]
def test_embedding_compute_bulk_endpoint_maps_value_error_to_400() -> None:
provider = FakeEmbeddingProvider()
client = TestClient(create_app(embedding_provider=provider))
response = client.post(
"/embeddings/compute-bulk",
json={"texts": ["one", "two", "three"]},
)
assert response.status_code == 400
assert response.json() == {"detail": "Maximum 2 texts allowed per batch"}
assert provider.calls == []
def test_code_execute_endpoint_is_not_registered_by_default() -> None:
client = TestClient(create_app())
response = client.post("/code/execute", json={"code": "result = 7"})
assert response.status_code == 404
def test_code_execute_endpoint_returns_result_when_enabled() -> None:
client = TestClient(create_app(enable_code_execution=True))
response = client.post(
"/code/execute",
json={"code": 'print("ran")\nresult = {"value": 7}'},
)
assert response.status_code == 200
body = response.json()
assert body["result"] == {"value": 7}
assert body["console_output"] == "ran\n"
assert body["error"] is None
assert body["message"] is None
assert body["visualizations"] is None
assert "=== Console Output ===" in body["formatted_result"]
assert "=== Result ===" in body["formatted_result"]
def test_code_execute_endpoint_serializes_numpy_result_when_enabled() -> None:
client = TestClient(create_app(enable_code_execution=True))
response = client.post(
"/code/execute",
json={"code": "import numpy as np\nresult = {'value': np.float64(1.25)}"},
)
assert response.status_code == 200
body = response.json()
assert body["result"] == {"value": 1.25}
assert body["error"] is None
def test_code_execute_endpoint_uses_host_free_boundary_when_enabled() -> None:
client = TestClient(create_app(enable_code_execution=True))
response = client.post(
"/code/execute",
json={
"source_id": "chat_123",
"message_id": "message_456",
"code": (
"import pandas as pd\n"
"result = save_df_to_scratchpad(pd.DataFrame({'value': [1]}), 'out.json')"
),
},
headers={"Authorization": "Bearer should-not-forward"},
)
assert response.status_code == 200
body = response.json()
assert body["result"] is None
assert (
body["error"]
== "nest_api_url, Authorization header, and source_id are required for scratchpad operations"
)
assert "=== Error ===" in body["formatted_result"]
def test_sql_parse_table_identifier_endpoint() -> None:
client = TestClient(create_app())
response = client.post(
"/sql/parse-table-identifier",
json={
"items": [
{
"key": "orders",
"sql_table_name": "public.orders",
"dialect": "postgres",
},
{
"key": "template",
"sql_table_name": "${orders.SQL_TABLE_NAME}",
"dialect": "postgres",
},
]
},
)
assert response.status_code == 200
body = response.json()
assert body["results"]["orders"]["ok"] is True
assert body["results"]["orders"]["schema"] == "public"
assert body["results"]["orders"]["name"] == "orders"
assert body["results"]["template"]["ok"] is False
assert body["results"]["template"]["reason"] == "looker_template_unresolved"
feat(mcp):added MCP server (#97) * docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-15 02:35:09 +02:00
def test_sql_validate_read_only_endpoint() -> None:
client = TestClient(create_app())
ok_response = client.post(
"/sql/validate-read-only",
json={"dialect": "postgres", "sql": "select * from public.orders"},
)
bad_response = client.post(
"/sql/validate-read-only",
json={
"dialect": "postgres",
"sql": "with x as (insert into audit.events values (1) returning *) select * from x",
},
)
assert ok_response.status_code == 200
assert ok_response.json() == {"ok": True, "error": None}
assert bad_response.status_code == 200
assert bad_response.json() == {
"ok": False,
"error": "SQL contains read/write operation: Insert",
}
2026-05-11 16:56:50 +02:00
def test_sql_analyze_batch_endpoint_returns_per_item_results() -> None:
client = TestClient(create_app())
response = client.post(
"/sql/analyze-batch",
json={
"dialect": "postgres",
"max_workers": 1,
"items": [
{
"id": "orders",
"sql": "select status from public.orders where created_at is not null",
},
{"id": "broken", "sql": "select * from where"},
],
},
)
assert response.status_code == 200
body = response.json()
assert body["results"]["orders"]["tables_touched"] == ["public.orders"]
assert body["results"]["orders"]["columns_by_clause"] == {
"select": ["status"],
"where": ["created_at"],
}
assert body["results"]["orders"]["error"] is None
assert body["results"]["broken"]["tables_touched"] == []
assert body["results"]["broken"]["columns_by_clause"] == {}
assert body["results"]["broken"]["error"] is not None
2026-05-10 23:12:26 +02:00
def test_semantic_query_endpoint_returns_sql() -> None:
client = TestClient(create_app())
response = client.post(
"/semantic-layer/query",
json={
"sources": [ORDERS_SOURCE],
"dialect": "postgres",
"query": {
"measures": ["orders.order_count"],
"dimensions": ["orders.status"],
},
},
)
assert response.status_code == 200
body = response.json()
assert body["dialect"] == "postgres"
assert "public.orders" in body["sql"]
assert body["columns"][0]["name"] == "orders.status"
def test_semantic_query_endpoint_maps_value_error_to_400() -> None:
client = TestClient(create_app())
response = client.post(
"/semantic-layer/query",
json={
"sources": [ORDERS_SOURCE],
"dialect": "postgres",
"query": {
"measures": ["missing.order_count"],
"dimensions": [],
},
},
)
assert response.status_code == 400
assert "missing.order_count" in response.json()["detail"]
def test_semantic_validate_endpoint_returns_structured_validation() -> None:
client = TestClient(create_app())
invalid_source = {
**ORDERS_SOURCE,
"measures": [
{"name": "revenue", "expr": "sum(amount)"},
{"name": "revenue", "expr": "sum(amount)"},
],
}
response = client.post(
"/semantic-layer/validate",
json={"sources": [invalid_source], "dialect": "postgres"},
)
assert response.status_code == 200
body = response.json()
assert body["valid"] is False
assert any("Duplicate measure" in error for error in body["errors"])
assert body["warnings"] == []
assert body["per_source_warnings"] == {}
def test_semantic_generate_sources_endpoint_returns_sources() -> None:
client = TestClient(create_app())
response = client.post(
"/semantic-layer/generate-sources",
json={
"tables": [
{
"name": "orders",
"db": "public",
"comment": "Orders table",
"columns": [
{
"name": "id",
"type": "integer",
"primary_key": True,
"nullable": False,
"comment": "Order ID",
},
{"name": "customer_id", "type": "integer"},
{
"name": "amount",
"type": "decimal",
"comment": "Order amount",
},
],
},
{
"name": "customers",
"db": "public",
"columns": [
{"name": "id", "type": "integer", "primary_key": True},
{"name": "email", "type": "varchar"},
],
},
],
"links": [
{
"from_table": "orders",
"from_column": "customer_id",
"to_table": "customers",
"to_column": "id",
"relationship_type": "MANY_TO_ONE",
}
],
"dialect": "postgres",
},
)
assert response.status_code == 200
body = response.json()
assert body["source_count"] == 2
sources = {source["name"]: source for source in body["sources"]}
assert sources["orders"]["table"] == "public.orders"
assert sources["orders"]["description"] == "Orders table"
assert sources["orders"]["grain"] == ["id"]
assert sources["orders"]["joins"] == [
{
"to": "customers",
"on": "customer_id = customers.id",
"relationship": "many_to_one",
}
]
assert [measure["name"] for measure in sources["orders"]["measures"]] == [
"record_count",
"total_amount",
"avg_amount",
]
def test_lookml_parse_endpoint_returns_resolved_views() -> None:
client = TestClient(create_app())
response = client.post(
"/lookml/parse",
json={
"files": [
{
"path": "views/orders.view.lkml",
"content": LOOKML_ORDER_VIEW,
}
],
"dialect": "postgres",
},
)
assert response.status_code == 200
body = response.json()
assert body["joins"] == []
assert body["skipped_views"] == []
assert body["warnings"] == []
assert len(body["views"]) == 1
view = body["views"][0]
assert view["name"] == "orders"
assert view["source_type"] == "table"
assert view["table_ref"] == "public.orders"
assert view["grain"] == ["id"]
assert [column["name"] for column in view["columns"]] == ["id", "status"]
assert view["measures"] == [
{
"name": "order_count",
"expr": "count(*)",
"filter": None,
"description": None,
}
]