docs: standardize fanout terminology (#218)

This commit is contained in:
Luca Martial 2026-05-25 11:09:33 -04:00 committed by GitHub
parent 4827437f3a
commit 924868841d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 66 additions and 66 deletions

View file

@ -59,7 +59,7 @@ uv run python -m semantic_layer.cli --model /tmp/model.yaml \
-q '{"measures":["orders.revenue"],"dimensions":["customers.segment"]}' --suggest
```
### 3. Test fan-out / chasm traps
### 3. Test fanout / chasm traps
Add multiple measure sources that fan out from a shared dimension hub:

View file

@ -160,7 +160,7 @@ def print_plan(plan) -> None:
print(" Joins:")
for jp in plan.join_paths:
print(f" {jp}")
print(f" Fan-out: {plan.fan_out_description}")
print(f" Fanout: {plan.fan_out_description}")
if plan.aggregate_locality:
print(" Locality:")
for al in plan.aggregate_locality:

View file

@ -92,7 +92,7 @@ class SqlGenerator:
return "WITH " + source_header + ",\n" + rest
return "WITH " + source_header + "\n" + outer_transpiled
# ── Path A: Simple (no fan-out) ────────────────────────────────────
# ── Path A: Simple (no fanout) ────────────────────────────────────
def _generate_simple(
self, plan: ResolvedPlan, sources: dict[str, SourceDefinition]
@ -216,7 +216,7 @@ class SqlGenerator:
shared_dim_aliases = shared_dim_aliases or set()
shared_dims = [dk for dk in all_dim_keys if dk["alias"] in shared_dim_aliases]
# Validate grain consistency: asymmetric dims cause FULL JOIN fan-out
# Validate grain consistency: asymmetric dims cause FULL JOIN fanout
if len(plan.measure_groups) > 1:
for group in plan.measure_groups:
cte_dim_aliases = {

View file

@ -107,7 +107,7 @@ class QueryPlanner:
for e in tree.edges
]
# 8. Detect fan-out / chasm trap
# 8. Detect fanout / chasm trap
has_fan_out, measure_groups, fan_out_desc, locality_descs = (
self._detect_fan_out(measures, dimensions, tree, filters=query.filters)
)
@ -937,7 +937,7 @@ class QueryPlanner:
filters: list[str] | None = None,
) -> tuple[bool, list[MeasureGroup], str, list[str]]:
"""
Detect fan-out and chasm traps. Group measures by source.
Detect fanout and chasm traps. Group measures by source.
If multiple measure sources exist, each needs its own pre-aggregation CTE.
Also checks filter sources a filter forcing a one_to_many join from the
measure source is an error (cannot be safely pre-aggregated).
@ -991,7 +991,7 @@ class QueryPlanner:
if len(groups) <= 1:
# Single measure group: check the path FROM measure source TO dimension sources.
# Only flag fan-out if those specific paths have one_to_many edges.
# Only flag fanout if those specific paths have one_to_many edges.
if groups:
source_name = next(iter(groups))
source_actual = self.graph.alias_map.get(source_name, source_name)
@ -999,7 +999,7 @@ class QueryPlanner:
for dim_src in dim_sources:
if dim_src == source_name:
continue
# Skip alias siblings (same underlying source — no fan-out)
# Skip alias siblings (same underlying source — no fanout)
dim_actual = self.graph.alias_map.get(dim_src, dim_src)
if dim_actual == source_actual:
continue
@ -1008,7 +1008,7 @@ class QueryPlanner:
has_o2m = True
break
# Also check filter sources for one_to_many fan-out
# Also check filter sources for one_to_many fanout
if not has_o2m:
for filter_src in filter_sources - dim_sources - {source_name}:
filter_actual = self.graph.alias_map.get(filter_src, filter_src)
@ -1019,7 +1019,7 @@ class QueryPlanner:
raise ValueError(
f"Filter on '{filter_src}' requires a one_to_many join "
f"from measure source '{source_name}', which would cause "
f"incorrect aggregation (fan-out). Consider rewriting the "
f"incorrect aggregation (fanout). Consider rewriting the "
f"filter as a subquery or adding the filter source as a "
f"dimension source."
)
@ -1033,10 +1033,10 @@ class QueryPlanner:
return (
True,
measure_groups,
f"Fan-out detected: one_to_many edges from {source_name} to dimensions",
f"Fanout detected: one_to_many edges from {source_name} to dimensions",
[f"Pre-aggregate {source_name} measures before joining"],
)
return False, [], "No fan-out", []
return False, [], "No fanout", []
# Multiple measure sources. Only merge groups that are provably row-safe
# (alias siblings or pure one_to_one chains). many_to_one chains are not
@ -1048,7 +1048,7 @@ class QueryPlanner:
# All measure sources are on the same safe join chain
if merged_groups:
mg_name, mg_measures = next(iter(merged_groups.items()))
# Still check if there's fan-out to dimension sources
# Still check if there's fanout to dimension sources
has_o2m = False
for dim_src in dim_sources:
if dim_src == mg_name:
@ -1061,10 +1061,10 @@ class QueryPlanner:
return (
True,
[MeasureGroup(source_name=mg_name, measures=mg_measures)],
f"Fan-out detected: one_to_many edges from {mg_name} to dimensions",
f"Fanout detected: one_to_many edges from {mg_name} to dimensions",
[f"Pre-aggregate {mg_name} measures before joining"],
)
return False, [], "No fan-out", []
return False, [], "No fanout", []
# True chasm trap — independent measure sources that can't be safely merged.
# Before building groups, validate that all filter sources are reachable

View file

@ -1,4 +1,4 @@
"""Dedicated tests for aggregate locality (fan-out/chasm trap correctness)."""
"""Dedicated tests for aggregate locality (fanout/chasm trap correctness)."""
import pytest
import sqlglot
@ -213,7 +213,7 @@ class TestNoFanOut:
sqlglot.parse(sql)
def test_m2o_join_no_ctes(self, ecommerce_sources):
"""orders → customers is m2o, no fan-out."""
"""orders → customers is m2o, no fanout."""
graph = JoinGraph(ecommerce_sources)
graph.build()
planner = QueryPlanner(ecommerce_sources, graph)
@ -540,7 +540,7 @@ class TestFactSideDimensionsInChasm:
"""LIMIT 1: Fact-side dimensions in chasm trap (local to one CTE only)."""
def test_fact_side_dimension_in_chasm_raises_error(self):
"""Asymmetric dim from fact_a only → raises error (would cause FULL JOIN fan-out)."""
"""Asymmetric dim from fact_a only → raises error (would cause FULL JOIN fanout)."""
hub = SourceDefinition(
name="hub",
table="public.hub",
@ -977,7 +977,7 @@ class TestBug13_FalseChasm_AliasAggregate:
dimensions=["billing_customer.name", "shipping_customer.name"],
)
plan = planner.plan(query)
assert not plan.has_fan_out, "Should not detect fan-out between alias siblings"
assert not plan.has_fan_out, "Should not detect fanout between alias siblings"
sql = gen.generate(plan, sources)
sqlglot.parse(sql)

View file

@ -305,12 +305,12 @@ class TestPredefinedMeasureDeps:
assert "GROUP BY" in sql.upper()
# ── Planner: fan-out with one_to_many to dimension sources (lines 595-643) ──
# ── Planner: fanout with one_to_many to dimension sources (lines 595-643) ──
class TestFanOutEdgeCases:
def test_single_source_fan_out_to_dimension(self):
"""Measure source with one_to_many to dimension should trigger fan-out."""
"""Measure source with one_to_many to dimension should trigger fanout."""
hub = SourceDefinition(
name="hub",
table="public.hub",

View file

@ -89,10 +89,10 @@ class TestCrossSourceM2O:
class TestFanOut:
"""Test 3: Fan-out (aggregate locality)."""
"""Test 3: Fanout (aggregate locality)."""
def test_orders_by_region_no_fanout(self, planner, generator, ecommerce_sources):
"""orders → customers → regions is all m2o. No fan-out needed."""
"""orders → customers → regions is all m2o. No fanout needed."""
sql = generate_sql(
planner,
generator,

View file

@ -200,12 +200,12 @@ class TestFanOutDetection:
class TestFanOutSingleSource:
"""Fan-out when a single measure source has o2m path to dimension source."""
"""Fanout when a single measure source has o2m path to dimension source."""
def test_reverse_path_fan_out(self):
"""Querying from customers (dimension) with measures from orders triggers fan-out
"""Querying from customers (dimension) with measures from orders triggers fanout
when the path from the measure source (orders) to the dimension source (customers)
is m2o so no fan-out. But reversed: measure on customers, dim on orders."""
is m2o so no fanout. But reversed: measure on customers, dim on orders."""
customers = SourceDefinition(
name="customers",
table="t",
@ -248,7 +248,7 @@ class TestFanOutSingleSource:
assert plan.has_fan_out
def test_m2o_multi_hop_no_fan_out(self, planner):
"""orders → customers → regions is all m2o. No fan-out."""
"""orders → customers → regions is all m2o. No fanout."""
query = SemanticQuery(
measures=["sum(orders.amount)"],
dimensions=["regions.name"],
@ -1116,7 +1116,7 @@ class TestDerivedMeasureEdgeCases:
assert_valid_sql(result.sql)
# ── From test_edge_cases.py: filter fan-out detection ────────────────
# ── From test_edge_cases.py: filter fanout detection ────────────────
class TestFilterFanOutDetection: