mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
136
python/klo-daemon/src/klo_daemon/semantic_layer.py
Normal file
136
python/klo-daemon/src/klo_daemon/semantic_layer.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
"""Semantic-layer compute helpers for the KLO daemon package."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from semantic_layer.duplicate_check import validate_measure_duplicates
|
||||
from semantic_layer.engine import SemanticEngine
|
||||
from semantic_layer.models import QueryResult, SourceDefinition
|
||||
|
||||
|
||||
class SemanticLayerQueryRequest(BaseModel):
|
||||
sources: list[dict[str, Any]]
|
||||
query: dict[str, Any]
|
||||
dialect: str = "postgres"
|
||||
|
||||
|
||||
class SemanticLayerQueryResponse(BaseModel):
|
||||
sql: str
|
||||
dialect: str
|
||||
columns: list[dict[str, Any]]
|
||||
plan: dict[str, Any]
|
||||
|
||||
|
||||
class ValidateSourcesRequest(BaseModel):
|
||||
sources: list[dict[str, Any]]
|
||||
dialect: str = "postgres"
|
||||
recently_touched: list[str] | None = None
|
||||
|
||||
|
||||
class ValidateSourcesResponse(BaseModel):
|
||||
valid: bool
|
||||
errors: list[str] = Field(default_factory=list)
|
||||
warnings: list[str] = Field(default_factory=list)
|
||||
per_source_warnings: dict[str, list[str]] = Field(default_factory=dict)
|
||||
|
||||
|
||||
def _load_sources(raw_sources: list[dict[str, Any]]) -> dict[str, SourceDefinition]:
|
||||
sources: dict[str, SourceDefinition] = {}
|
||||
for raw_source in raw_sources:
|
||||
source = SourceDefinition(**raw_source)
|
||||
if source.name in sources:
|
||||
raise ValueError(f"Duplicate source name '{source.name}'")
|
||||
sources[source.name] = source
|
||||
return sources
|
||||
|
||||
|
||||
def _validate_duplicate_measure_names(source: SourceDefinition) -> list[str]:
|
||||
errors: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for measure in source.measures:
|
||||
if measure.name in seen:
|
||||
errors.append(
|
||||
f"Duplicate measure '{measure.name}' on source '{source.name}'"
|
||||
)
|
||||
continue
|
||||
seen.add(measure.name)
|
||||
return errors
|
||||
|
||||
|
||||
def _response_columns(result: QueryResult) -> list[dict[str, Any]]:
|
||||
measure_names = {
|
||||
measure.name: measure.qualified_ref
|
||||
for measure in result.resolved_plan.measures
|
||||
if measure.qualified_ref
|
||||
}
|
||||
columns: list[dict[str, Any]] = []
|
||||
for column in result.columns:
|
||||
dumped = column.model_dump(mode="json")
|
||||
if column.provenance.value == "dimension" and column.expr:
|
||||
dumped["name"] = column.expr
|
||||
elif column.name in measure_names:
|
||||
dumped["name"] = measure_names[column.name]
|
||||
columns.append(dumped)
|
||||
return columns
|
||||
|
||||
|
||||
def query_semantic_layer(
|
||||
request: SemanticLayerQueryRequest,
|
||||
) -> SemanticLayerQueryResponse:
|
||||
sources = _load_sources(request.sources)
|
||||
engine = SemanticEngine.from_sources(sources, dialect=request.dialect)
|
||||
result = engine.query(request.query)
|
||||
return SemanticLayerQueryResponse(
|
||||
sql=result.sql,
|
||||
dialect=result.dialect,
|
||||
columns=_response_columns(result),
|
||||
plan=result.resolved_plan.model_dump(mode="json"),
|
||||
)
|
||||
|
||||
|
||||
def validate_semantic_layer(request: ValidateSourcesRequest) -> ValidateSourcesResponse:
|
||||
errors: list[str] = []
|
||||
warnings: list[str] = []
|
||||
per_source_warnings: dict[str, list[str]] = {}
|
||||
sources: dict[str, SourceDefinition] = {}
|
||||
seen_names: set[str] = set()
|
||||
|
||||
for raw_source in request.sources:
|
||||
raw_name = raw_source.get("name") if isinstance(raw_source, dict) else None
|
||||
try:
|
||||
source = SourceDefinition(**raw_source)
|
||||
except Exception as error:
|
||||
label = raw_name or "<unknown>"
|
||||
errors.append(f"Source '{label}' failed to parse: {error}")
|
||||
continue
|
||||
|
||||
if source.name in seen_names:
|
||||
errors.append(f"Duplicate source name '{source.name}'")
|
||||
continue
|
||||
seen_names.add(source.name)
|
||||
sources[source.name] = source
|
||||
errors.extend(_validate_duplicate_measure_names(source))
|
||||
|
||||
if sources:
|
||||
try:
|
||||
engine = SemanticEngine.from_sources(sources, dialect=request.dialect)
|
||||
report = engine.validate(
|
||||
recently_touched=set(request.recently_touched)
|
||||
if request.recently_touched
|
||||
else None
|
||||
)
|
||||
errors.extend(report.errors)
|
||||
warnings.extend(report.warnings)
|
||||
per_source_warnings.update(report.per_source_warnings)
|
||||
errors.extend(validate_measure_duplicates(sources, dialect=request.dialect))
|
||||
except Exception as error:
|
||||
errors.append(f"Validation failed: {error}")
|
||||
|
||||
return ValidateSourcesResponse(
|
||||
valid=len(errors) == 0,
|
||||
errors=errors,
|
||||
warnings=warnings,
|
||||
per_source_warnings=per_source_warnings,
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue