Normalize semantic layer descriptions

This commit is contained in:
Luca Martial 2026-05-11 00:31:15 -07:00
parent c82989119b
commit 86c818a454
21 changed files with 498 additions and 37 deletions

View file

@ -36,6 +36,22 @@ class SourceColumnTests(BaseModel):
dbt_by_package: dict[str, list[str]] | None = None
_DEFAULT_DESCRIPTION_PRIORITY = ["user", "ai", "dbt", "db"]
def _resolve_description_map(descriptions: dict[str, str] | None) -> str | None:
if not descriptions:
return None
for source in _DEFAULT_DESCRIPTION_PRIORITY:
text = descriptions.get(source)
if text:
return text
for text in descriptions.values():
if text:
return text
return None
class FreshnessDbt(BaseModel):
raw: Any | None = None
loaded_at_field: str | None = None
@ -47,12 +63,19 @@ class SourceColumn(BaseModel):
visibility: ColumnVisibility = ColumnVisibility.PUBLIC
role: ColumnRole = ColumnRole.DEFAULT
description: str | None = None
descriptions: dict[str, str] | None = None
expr: str | None = None
natural_granularity: str | None = None
constraints: dict[str, ColumnDbtConstraints] | None = None
enum_values: dict[str, list[str]] | None = None
tests: SourceColumnTests | None = None
@model_validator(mode="after")
def resolve_description(self) -> SourceColumn:
if self.description is None:
self.description = _resolve_description_map(self.descriptions)
return self
class JoinDeclaration(BaseModel):
to: str
@ -84,6 +107,7 @@ class DefaultTimeDimensionDbt(BaseModel):
class SourceDefinition(BaseModel):
name: str
description: str | None = None
descriptions: dict[str, str] | None = None
table: str | None = None
sql: str | None = None
grain: list[str]
@ -97,6 +121,8 @@ class SourceDefinition(BaseModel):
@model_validator(mode="after")
def validate_source(self) -> SourceDefinition:
if self.description is None:
self.description = _resolve_description_map(self.descriptions)
if self.table and self.sql:
raise ValueError("'table' and 'sql' are mutually exclusive")
if not self.grain:

View file

@ -33,6 +33,14 @@ class TestSourceColumn:
assert col.visibility == ColumnVisibility.HIDDEN
assert col.role == ColumnRole.TIME
def test_descriptions_map_resolves_visible_description(self):
col = SourceColumn(
name="account_id",
type="string",
descriptions={"ktx": "Identifier for the related account."},
)
assert col.description == "Identifier for the related account."
def test_invalid_type(self):
with pytest.raises(ValidationError):
SourceColumn(name="id", type="integer")
@ -63,6 +71,16 @@ class TestSourceDefinition:
assert src.is_sql_source
assert not src.is_table_source
def test_descriptions_map_resolves_visible_description(self):
src = SourceDefinition(
name="orders",
descriptions={"ktx": "Semantic-layer source for orders."},
table="public.orders",
grain=["id"],
columns=[SourceColumn(name="id", type="number")],
)
assert src.description == "Semantic-layer source for orders."
def test_table_and_sql_mutually_exclusive(self):
with pytest.raises(ValidationError, match="mutually exclusive"):
SourceDefinition(