refactor: remove legacy compatibility paths

This commit is contained in:
Andrey Avtomonov 2026-05-13 14:37:05 +02:00
parent c22248dabf
commit a517c834fe
83 changed files with 239 additions and 534 deletions

View file

@ -59,9 +59,7 @@ class SourceLoader:
f"Duplicate source name '{name}' in manifest shard {path}"
)
sources[name] = project_manifest_entry(name, entry)
description_sources[name] = _description_sources(
entry.descriptions, entry.description, entry.db_description
)
description_sources[name] = _description_sources(entry.descriptions)
# 2. Load files outside _schema/
for path in sorted(self.sources_dir.rglob("*.yaml")):
@ -138,11 +136,6 @@ class SourceLoader:
source = deepcopy(base)
description_sources = dict(base_description_sources or {})
# Overlay description semantics match the server: `description` writes the
# `user` source key, and `descriptions` merges keyed sources before a single
# visible description is resolved from the full map.
if overlay.get("description"):
description_sources["user"] = overlay["description"]
if overlay.get("descriptions"):
description_sources.update(
{
@ -151,7 +144,7 @@ class SourceLoader:
if text
}
)
if overlay.get("description") or overlay.get("descriptions"):
if overlay.get("descriptions"):
source.description = _resolve_description(
description_sources or None,
)

View file

@ -76,31 +76,17 @@ def map_column_type(db_type: str) -> str:
_DEFAULT_PRIORITY = ["user", "ai", "dbt", "db"]
def _description_sources(
descriptions: dict[str, str] | None,
description: str | None = None,
db_description: str | None = None,
) -> dict[str, str] | None:
def _description_sources(descriptions: dict[str, str] | None) -> dict[str, str] | None:
"""Normalize multi-source descriptions to a keyed map."""
if descriptions:
result = {source: text for source, text in descriptions.items() if text}
if result:
return result
result: dict[str, str] = {}
if description:
result["ai"] = description
if db_description:
result["db"] = db_description
return result or None
return None
def _resolve_description(
descriptions: dict[str, str] | None,
description: str | None = None,
db_description: str | None = None,
) -> str | None:
"""Resolve a single description from a multi-source map or legacy flat fields."""
def _resolve_description(descriptions: dict[str, str] | None) -> str | None:
"""Resolve a single description from a multi-source map."""
if descriptions:
for source in _DEFAULT_PRIORITY:
if text := descriptions.get(source):
@ -109,11 +95,6 @@ def _resolve_description(
for text in descriptions.values():
if text:
return text
# Legacy flat fields
if description:
return description
if db_description:
return db_description
return None
@ -123,18 +104,13 @@ class ManifestColumn(BaseModel):
pk: bool = False
nullable: bool = True
descriptions: dict[str, str] | None = None
# Legacy flat fields (backwards-compatible YAML parsing)
description: str | None = None
db_description: str | None = None
constraints: dict | None = None
enum_values: dict[str, list[str]] | None = None
tests: SourceColumnTests | None = None
@property
def resolved_description(self) -> str | None:
return _resolve_description(
self.descriptions, self.description, self.db_description
)
return _resolve_description(self.descriptions)
class ManifestJoin(BaseModel):
@ -147,9 +123,6 @@ class ManifestJoin(BaseModel):
class ManifestEntry(BaseModel):
table: str
descriptions: dict[str, str] | None = None
# Legacy flat fields (backwards-compatible YAML parsing)
description: str | None = None
db_description: str | None = None
columns: list[ManifestColumn]
joins: list[ManifestJoin] = []
default_time_dimension: DefaultTimeDimensionDbt | None = None
@ -158,9 +131,7 @@ class ManifestEntry(BaseModel):
@property
def resolved_description(self) -> str | None:
return _resolve_description(
self.descriptions, self.description, self.db_description
)
return _resolve_description(self.descriptions)
class Manifest(BaseModel):
@ -178,6 +149,8 @@ def validate_overlay(data: dict) -> list[str]:
Returns a list of error messages (empty if valid).
"""
errors: list[str] = []
if "description" in data:
errors.append("Overlay must use 'descriptions' for source descriptions")
if "table" in data:
errors.append("Overlay must not contain 'table' (owned by manifest)")
if "sql" in data:
@ -185,6 +158,10 @@ def validate_overlay(data: dict) -> list[str]:
"Overlay must not contain 'sql' (that makes it a standalone source)"
)
for col in data.get("columns", []):
if "description" in col:
errors.append(
f"Overlay column '{col.get('name', '?')}' must use 'descriptions'"
)
if "type" in col and "expr" not in col:
errors.append(
f"Overlay column '{col.get('name', '?')}' specifies 'type' without 'expr' "

View file

@ -1,10 +1,11 @@
name: churn_risk
description: |
Per-account churn risk scoring for B2B SaaS customers. Combines signals from
subscriptions (cancellation history), support tickets (severity, SLA breaches),
product usage (adoption decline), contracts (renewal proximity), CSM activities
(engagement recency), and invoices (payment issues) into a weighted composite
risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account.
descriptions:
user: |
Per-account churn risk scoring for B2B SaaS customers. Combines signals from
subscriptions (cancellation history), support tickets (severity, SLA breaches),
product usage (adoption decline), contracts (renewal proximity), CSM activities
(engagement recency), and invoices (payment issues) into a weighted composite
risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account.
sql: |
WITH sub_signals AS (
SELECT

View file

@ -1,7 +1,8 @@
name: churn_risk
description: |
Customer churn risk score combining tenure,
usage trends, and support burden.
descriptions:
user: |
Customer churn risk score combining tenure,
usage trends, and support burden.
sql: |
SELECT
c.id AS customer_id,

View file

@ -95,7 +95,7 @@ class TestProjectManifestEntry:
def orders_entry(self) -> ManifestEntry:
return ManifestEntry(
table="public.orders",
description="Customer orders",
descriptions={"user": "Customer orders"},
columns=[
ManifestColumn(name="id", type="integer", pk=True),
ManifestColumn(name="customer_id", type="integer"),
@ -202,7 +202,7 @@ class TestValidateOverlay:
def test_validate_overlay_valid(self):
data = {
"name": "orders",
"description": "Revenue-bearing orders",
"descriptions": {"user": "Revenue-bearing orders"},
"grain": ["id"],
"measures": [{"name": "revenue", "expr": "sum(total)"}],
"columns": [
@ -259,7 +259,7 @@ def _manifest_tables() -> dict:
"tables": {
"orders": {
"table": "public.orders",
"description": "Customer orders",
"descriptions": {"user": "Customer orders"},
"columns": [
{"name": "id", "type": "integer", "pk": True},
{"name": "customer_id", "type": "integer"},
@ -278,7 +278,7 @@ def _manifest_tables() -> dict:
},
"customers": {
"table": "public.customers",
"description": "Customer accounts",
"descriptions": {"user": "Customer accounts"},
"columns": [
{"name": "id", "type": "integer", "pk": True},
{"name": "name", "type": "varchar"},
@ -329,12 +329,12 @@ class TestTwoTierLoading:
assert sources["regions"].table == "public.regions"
assert sources["regions"].is_table_source
def test_overlay_descriptions_do_not_promote_base_description_to_user_source(
def test_overlay_descriptions_do_not_promote_base_map_to_user_source(
self, tmp_path: Path
):
standalone = {
"name": "regions",
"description": "Standalone description",
"descriptions": {"ai": "Standalone description"},
"table": "public.regions",
"grain": ["id"],
"columns": [
@ -376,7 +376,7 @@ class TestTwoTierLoading:
overlay = {
"name": "orders",
"description": "Revenue-bearing orders",
"descriptions": {"user": "Revenue-bearing orders"},
"grain": ["id"],
"measures": [{"name": "revenue", "expr": "sum(total)"}],
}
@ -394,11 +394,11 @@ class TestTwoTierLoading:
assert len(orders.measures) == 1
assert orders.measures[0].name == "revenue"
def test_overlay_description_override(self, tmp_path: Path):
def test_overlay_description_map_override(self, tmp_path: Path):
schema_dir = tmp_path / "_schema"
_write_yaml(schema_dir / "public.yaml", _manifest_tables())
overlay = {"name": "orders", "description": "Overridden description"}
overlay = {"name": "orders", "descriptions": {"user": "Overridden description"}}
_write_yaml(tmp_path / "orders.yaml", overlay)
_write_yaml(tmp_path / "customers.yaml", {"name": "customers"})
@ -426,7 +426,7 @@ class TestTwoTierLoading:
sources = loader.load_all()
assert sources["orders"].description == "Customer orders"
def test_overlay_descriptions_map_overrides_lower_priority_db_description(
def test_overlay_descriptions_map_overrides_lower_priority_db_source(
self, tmp_path: Path
):
schema_dir = tmp_path / "_schema"