mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
docs: standardize fanout terminology (#218)
This commit is contained in:
parent
4827437f3a
commit
924868841d
19 changed files with 66 additions and 66 deletions
|
|
@ -59,7 +59,7 @@ uv run python -m semantic_layer.cli --model /tmp/model.yaml \
|
|||
-q '{"measures":["orders.revenue"],"dimensions":["customers.segment"]}' --suggest
|
||||
```
|
||||
|
||||
### 3. Test fan-out / chasm traps
|
||||
### 3. Test fanout / chasm traps
|
||||
|
||||
Add multiple measure sources that fan out from a shared dimension hub:
|
||||
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ def print_plan(plan) -> None:
|
|||
print(" Joins:")
|
||||
for jp in plan.join_paths:
|
||||
print(f" {jp}")
|
||||
print(f" Fan-out: {plan.fan_out_description}")
|
||||
print(f" Fanout: {plan.fan_out_description}")
|
||||
if plan.aggregate_locality:
|
||||
print(" Locality:")
|
||||
for al in plan.aggregate_locality:
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class SqlGenerator:
|
|||
return "WITH " + source_header + ",\n" + rest
|
||||
return "WITH " + source_header + "\n" + outer_transpiled
|
||||
|
||||
# ── Path A: Simple (no fan-out) ────────────────────────────────────
|
||||
# ── Path A: Simple (no fanout) ────────────────────────────────────
|
||||
|
||||
def _generate_simple(
|
||||
self, plan: ResolvedPlan, sources: dict[str, SourceDefinition]
|
||||
|
|
@ -216,7 +216,7 @@ class SqlGenerator:
|
|||
shared_dim_aliases = shared_dim_aliases or set()
|
||||
shared_dims = [dk for dk in all_dim_keys if dk["alias"] in shared_dim_aliases]
|
||||
|
||||
# Validate grain consistency: asymmetric dims cause FULL JOIN fan-out
|
||||
# Validate grain consistency: asymmetric dims cause FULL JOIN fanout
|
||||
if len(plan.measure_groups) > 1:
|
||||
for group in plan.measure_groups:
|
||||
cte_dim_aliases = {
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ class QueryPlanner:
|
|||
for e in tree.edges
|
||||
]
|
||||
|
||||
# 8. Detect fan-out / chasm trap
|
||||
# 8. Detect fanout / chasm trap
|
||||
has_fan_out, measure_groups, fan_out_desc, locality_descs = (
|
||||
self._detect_fan_out(measures, dimensions, tree, filters=query.filters)
|
||||
)
|
||||
|
|
@ -937,7 +937,7 @@ class QueryPlanner:
|
|||
filters: list[str] | None = None,
|
||||
) -> tuple[bool, list[MeasureGroup], str, list[str]]:
|
||||
"""
|
||||
Detect fan-out and chasm traps. Group measures by source.
|
||||
Detect fanout and chasm traps. Group measures by source.
|
||||
If multiple measure sources exist, each needs its own pre-aggregation CTE.
|
||||
Also checks filter sources — a filter forcing a one_to_many join from the
|
||||
measure source is an error (cannot be safely pre-aggregated).
|
||||
|
|
@ -991,7 +991,7 @@ class QueryPlanner:
|
|||
|
||||
if len(groups) <= 1:
|
||||
# Single measure group: check the path FROM measure source TO dimension sources.
|
||||
# Only flag fan-out if those specific paths have one_to_many edges.
|
||||
# Only flag fanout if those specific paths have one_to_many edges.
|
||||
if groups:
|
||||
source_name = next(iter(groups))
|
||||
source_actual = self.graph.alias_map.get(source_name, source_name)
|
||||
|
|
@ -999,7 +999,7 @@ class QueryPlanner:
|
|||
for dim_src in dim_sources:
|
||||
if dim_src == source_name:
|
||||
continue
|
||||
# Skip alias siblings (same underlying source — no fan-out)
|
||||
# Skip alias siblings (same underlying source — no fanout)
|
||||
dim_actual = self.graph.alias_map.get(dim_src, dim_src)
|
||||
if dim_actual == source_actual:
|
||||
continue
|
||||
|
|
@ -1008,7 +1008,7 @@ class QueryPlanner:
|
|||
has_o2m = True
|
||||
break
|
||||
|
||||
# Also check filter sources for one_to_many fan-out
|
||||
# Also check filter sources for one_to_many fanout
|
||||
if not has_o2m:
|
||||
for filter_src in filter_sources - dim_sources - {source_name}:
|
||||
filter_actual = self.graph.alias_map.get(filter_src, filter_src)
|
||||
|
|
@ -1019,7 +1019,7 @@ class QueryPlanner:
|
|||
raise ValueError(
|
||||
f"Filter on '{filter_src}' requires a one_to_many join "
|
||||
f"from measure source '{source_name}', which would cause "
|
||||
f"incorrect aggregation (fan-out). Consider rewriting the "
|
||||
f"incorrect aggregation (fanout). Consider rewriting the "
|
||||
f"filter as a subquery or adding the filter source as a "
|
||||
f"dimension source."
|
||||
)
|
||||
|
|
@ -1033,10 +1033,10 @@ class QueryPlanner:
|
|||
return (
|
||||
True,
|
||||
measure_groups,
|
||||
f"Fan-out detected: one_to_many edges from {source_name} to dimensions",
|
||||
f"Fanout detected: one_to_many edges from {source_name} to dimensions",
|
||||
[f"Pre-aggregate {source_name} measures before joining"],
|
||||
)
|
||||
return False, [], "No fan-out", []
|
||||
return False, [], "No fanout", []
|
||||
|
||||
# Multiple measure sources. Only merge groups that are provably row-safe
|
||||
# (alias siblings or pure one_to_one chains). many_to_one chains are not
|
||||
|
|
@ -1048,7 +1048,7 @@ class QueryPlanner:
|
|||
# All measure sources are on the same safe join chain
|
||||
if merged_groups:
|
||||
mg_name, mg_measures = next(iter(merged_groups.items()))
|
||||
# Still check if there's fan-out to dimension sources
|
||||
# Still check if there's fanout to dimension sources
|
||||
has_o2m = False
|
||||
for dim_src in dim_sources:
|
||||
if dim_src == mg_name:
|
||||
|
|
@ -1061,10 +1061,10 @@ class QueryPlanner:
|
|||
return (
|
||||
True,
|
||||
[MeasureGroup(source_name=mg_name, measures=mg_measures)],
|
||||
f"Fan-out detected: one_to_many edges from {mg_name} to dimensions",
|
||||
f"Fanout detected: one_to_many edges from {mg_name} to dimensions",
|
||||
[f"Pre-aggregate {mg_name} measures before joining"],
|
||||
)
|
||||
return False, [], "No fan-out", []
|
||||
return False, [], "No fanout", []
|
||||
|
||||
# True chasm trap — independent measure sources that can't be safely merged.
|
||||
# Before building groups, validate that all filter sources are reachable
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Dedicated tests for aggregate locality (fan-out/chasm trap correctness)."""
|
||||
"""Dedicated tests for aggregate locality (fanout/chasm trap correctness)."""
|
||||
|
||||
import pytest
|
||||
import sqlglot
|
||||
|
|
@ -213,7 +213,7 @@ class TestNoFanOut:
|
|||
sqlglot.parse(sql)
|
||||
|
||||
def test_m2o_join_no_ctes(self, ecommerce_sources):
|
||||
"""orders → customers is m2o, no fan-out."""
|
||||
"""orders → customers is m2o, no fanout."""
|
||||
graph = JoinGraph(ecommerce_sources)
|
||||
graph.build()
|
||||
planner = QueryPlanner(ecommerce_sources, graph)
|
||||
|
|
@ -540,7 +540,7 @@ class TestFactSideDimensionsInChasm:
|
|||
"""LIMIT 1: Fact-side dimensions in chasm trap (local to one CTE only)."""
|
||||
|
||||
def test_fact_side_dimension_in_chasm_raises_error(self):
|
||||
"""Asymmetric dim from fact_a only → raises error (would cause FULL JOIN fan-out)."""
|
||||
"""Asymmetric dim from fact_a only → raises error (would cause FULL JOIN fanout)."""
|
||||
hub = SourceDefinition(
|
||||
name="hub",
|
||||
table="public.hub",
|
||||
|
|
@ -977,7 +977,7 @@ class TestBug13_FalseChasm_AliasAggregate:
|
|||
dimensions=["billing_customer.name", "shipping_customer.name"],
|
||||
)
|
||||
plan = planner.plan(query)
|
||||
assert not plan.has_fan_out, "Should not detect fan-out between alias siblings"
|
||||
assert not plan.has_fan_out, "Should not detect fanout between alias siblings"
|
||||
sql = gen.generate(plan, sources)
|
||||
sqlglot.parse(sql)
|
||||
|
||||
|
|
|
|||
|
|
@ -305,12 +305,12 @@ class TestPredefinedMeasureDeps:
|
|||
assert "GROUP BY" in sql.upper()
|
||||
|
||||
|
||||
# ── Planner: fan-out with one_to_many to dimension sources (lines 595-643) ──
|
||||
# ── Planner: fanout with one_to_many to dimension sources (lines 595-643) ──
|
||||
|
||||
|
||||
class TestFanOutEdgeCases:
|
||||
def test_single_source_fan_out_to_dimension(self):
|
||||
"""Measure source with one_to_many to dimension should trigger fan-out."""
|
||||
"""Measure source with one_to_many to dimension should trigger fanout."""
|
||||
hub = SourceDefinition(
|
||||
name="hub",
|
||||
table="public.hub",
|
||||
|
|
|
|||
|
|
@ -89,10 +89,10 @@ class TestCrossSourceM2O:
|
|||
|
||||
|
||||
class TestFanOut:
|
||||
"""Test 3: Fan-out (aggregate locality)."""
|
||||
"""Test 3: Fanout (aggregate locality)."""
|
||||
|
||||
def test_orders_by_region_no_fanout(self, planner, generator, ecommerce_sources):
|
||||
"""orders → customers → regions is all m2o. No fan-out needed."""
|
||||
"""orders → customers → regions is all m2o. No fanout needed."""
|
||||
sql = generate_sql(
|
||||
planner,
|
||||
generator,
|
||||
|
|
|
|||
|
|
@ -200,12 +200,12 @@ class TestFanOutDetection:
|
|||
|
||||
|
||||
class TestFanOutSingleSource:
|
||||
"""Fan-out when a single measure source has o2m path to dimension source."""
|
||||
"""Fanout when a single measure source has o2m path to dimension source."""
|
||||
|
||||
def test_reverse_path_fan_out(self):
|
||||
"""Querying from customers (dimension) with measures from orders triggers fan-out
|
||||
"""Querying from customers (dimension) with measures from orders triggers fanout
|
||||
when the path from the measure source (orders) to the dimension source (customers)
|
||||
is m2o — so no fan-out. But reversed: measure on customers, dim on orders."""
|
||||
is m2o — so no fanout. But reversed: measure on customers, dim on orders."""
|
||||
customers = SourceDefinition(
|
||||
name="customers",
|
||||
table="t",
|
||||
|
|
@ -248,7 +248,7 @@ class TestFanOutSingleSource:
|
|||
assert plan.has_fan_out
|
||||
|
||||
def test_m2o_multi_hop_no_fan_out(self, planner):
|
||||
"""orders → customers → regions is all m2o. No fan-out."""
|
||||
"""orders → customers → regions is all m2o. No fanout."""
|
||||
query = SemanticQuery(
|
||||
measures=["sum(orders.amount)"],
|
||||
dimensions=["regions.name"],
|
||||
|
|
@ -1116,7 +1116,7 @@ class TestDerivedMeasureEdgeCases:
|
|||
assert_valid_sql(result.sql)
|
||||
|
||||
|
||||
# ── From test_edge_cases.py: filter fan-out detection ────────────────
|
||||
# ── From test_edge_cases.py: filter fanout detection ────────────────
|
||||
|
||||
|
||||
class TestFilterFanOutDetection:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue