mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
222 lines
5.7 KiB
YAML
222 lines
5.7 KiB
YAML
# Complex CTE Runtime Join Demo
|
|
#
|
|
# Demonstrates:
|
|
# 1. Two SQL sources with internal CTEs (customer_lifetime_value, churn_risk)
|
|
# 2. Both join to `customers` at the source level (many_to_one)
|
|
# 3. `customers` joins to `regions` (many_to_one)
|
|
# 4. A query requesting measures from BOTH SQL sources + dimensions from `regions`
|
|
# triggers chasm trap detection and aggregate locality
|
|
#
|
|
# Join graph:
|
|
# customer_lifetime_value --m2o--> customers --m2o--> regions
|
|
# churn_risk --m2o--> customers --m2o--> regions
|
|
|
|
# --- Table sources ---
|
|
|
|
- name: regions
|
|
table: public.regions
|
|
grain: [id]
|
|
columns:
|
|
- name: id
|
|
type: number
|
|
- name: name
|
|
type: string
|
|
- name: continent
|
|
type: string
|
|
|
|
- name: customers
|
|
table: public.customers
|
|
grain: [id]
|
|
columns:
|
|
- name: id
|
|
type: number
|
|
- name: name
|
|
type: string
|
|
- name: segment
|
|
type: string
|
|
- name: region_id
|
|
type: number
|
|
- name: signed_at
|
|
type: time
|
|
role: time
|
|
- name: arr
|
|
type: number
|
|
joins:
|
|
- to: regions
|
|
"on": region_id = regions.id
|
|
relationship: many_to_one
|
|
|
|
- name: orders
|
|
table: public.orders
|
|
grain: [id]
|
|
columns:
|
|
- name: id
|
|
type: number
|
|
- name: customer_id
|
|
type: number
|
|
- name: amount
|
|
type: number
|
|
- name: created_at
|
|
type: time
|
|
role: time
|
|
joins:
|
|
- to: customers
|
|
"on": customer_id = customers.id
|
|
relationship: many_to_one
|
|
|
|
- name: order_items
|
|
table: public.order_items
|
|
grain: [id]
|
|
columns:
|
|
- name: id
|
|
type: number
|
|
- name: order_id
|
|
type: number
|
|
- name: quantity
|
|
type: number
|
|
- name: unit_price
|
|
type: number
|
|
joins:
|
|
- to: orders
|
|
"on": order_id = orders.id
|
|
relationship: many_to_one
|
|
|
|
# --- SQL source: Customer Lifetime Value (uses internal CTEs) ---
|
|
|
|
- name: customer_lifetime_value
|
|
description: |
|
|
Customer lifetime value estimate using monthly revenue cohort analysis.
|
|
Internal CTEs aggregate orders+order_items by month, then compute
|
|
active_months and avg_mrr per customer before estimating LTV.
|
|
sql: |
|
|
WITH monthly_revenue AS (
|
|
SELECT
|
|
o.customer_id,
|
|
DATE_TRUNC('month', o.created_at) AS month,
|
|
SUM(oi.quantity * oi.unit_price) AS mrr
|
|
FROM orders o
|
|
JOIN order_items oi ON o.id = oi.order_id
|
|
GROUP BY o.customer_id, DATE_TRUNC('month', o.created_at)
|
|
),
|
|
cohort_stats AS (
|
|
SELECT
|
|
customer_id,
|
|
MIN(month) AS first_month,
|
|
COUNT(DISTINCT month) AS active_months,
|
|
AVG(mrr) AS avg_mrr
|
|
FROM monthly_revenue
|
|
GROUP BY customer_id
|
|
)
|
|
SELECT
|
|
cs.customer_id,
|
|
cs.first_month,
|
|
cs.active_months,
|
|
cs.avg_mrr,
|
|
cs.avg_mrr * cs.active_months * 1.2 AS ltv_estimate
|
|
FROM cohort_stats cs
|
|
grain: [customer_id]
|
|
columns:
|
|
- name: customer_id
|
|
type: number
|
|
- name: first_month
|
|
type: time
|
|
- name: active_months
|
|
type: number
|
|
- name: avg_mrr
|
|
type: number
|
|
- name: ltv_estimate
|
|
type: number
|
|
joins:
|
|
- to: customers
|
|
"on": customer_id = customers.id
|
|
relationship: many_to_one
|
|
measures:
|
|
- name: avg_ltv
|
|
expr: avg(ltv_estimate)
|
|
description: "Average customer lifetime value"
|
|
- name: total_ltv
|
|
expr: sum(ltv_estimate)
|
|
description: "Total lifetime value across customers"
|
|
- name: avg_active_months
|
|
expr: avg(active_months)
|
|
description: "Average number of active months per customer"
|
|
|
|
# --- SQL source: Churn Risk (uses internal CTEs) ---
|
|
|
|
- name: churn_risk
|
|
description: |
|
|
Customer churn risk score combining recency, frequency, and support burden.
|
|
Internal CTEs compute rfm_scores from orders and ticket_counts from a
|
|
support table before producing a weighted composite score.
|
|
sql: |
|
|
WITH rfm_scores AS (
|
|
SELECT
|
|
customer_id,
|
|
EXTRACT(DAY FROM NOW() - MAX(created_at)) AS days_since_last_order,
|
|
COUNT(*) AS order_frequency,
|
|
AVG(amount) AS avg_order_value
|
|
FROM orders
|
|
GROUP BY customer_id
|
|
),
|
|
ticket_counts AS (
|
|
SELECT
|
|
customer_id,
|
|
COUNT(*) AS open_tickets,
|
|
AVG(EXTRACT(DAY FROM resolved_at - created_at)) AS avg_resolution_days
|
|
FROM support_tickets
|
|
WHERE status = 'open'
|
|
GROUP BY customer_id
|
|
)
|
|
SELECT
|
|
r.customer_id,
|
|
r.days_since_last_order,
|
|
r.order_frequency,
|
|
COALESCE(t.open_tickets, 0) AS open_tickets,
|
|
CASE
|
|
WHEN r.days_since_last_order > 180 THEN 0.9
|
|
WHEN r.days_since_last_order > 90 THEN 0.6
|
|
ELSE 0.2
|
|
END * 0.4
|
|
+ CASE
|
|
WHEN r.order_frequency < 2 THEN 0.8
|
|
WHEN r.order_frequency < 5 THEN 0.4
|
|
ELSE 0.1
|
|
END * 0.3
|
|
+ CASE
|
|
WHEN COALESCE(t.open_tickets, 0) > 3 THEN 0.9
|
|
WHEN COALESCE(t.open_tickets, 0) > 1 THEN 0.5
|
|
ELSE 0.1
|
|
END * 0.3 AS score,
|
|
CASE
|
|
WHEN r.avg_order_value < 100 THEN 'SMB'
|
|
WHEN r.avg_order_value < 1000 THEN 'Mid-Market'
|
|
ELSE 'Enterprise'
|
|
END AS customer_type
|
|
FROM rfm_scores r
|
|
LEFT JOIN ticket_counts t ON r.customer_id = t.customer_id
|
|
grain: [customer_id]
|
|
columns:
|
|
- name: customer_id
|
|
type: number
|
|
- name: days_since_last_order
|
|
type: number
|
|
- name: order_frequency
|
|
type: number
|
|
- name: open_tickets
|
|
type: number
|
|
- name: score
|
|
type: number
|
|
- name: customer_type
|
|
type: string
|
|
joins:
|
|
- to: customers
|
|
"on": customer_id = customers.id
|
|
relationship: many_to_one
|
|
measures:
|
|
- name: avg_risk
|
|
expr: avg(score)
|
|
description: "Average churn risk score"
|
|
- name: high_risk_count
|
|
expr: count(customer_id)
|
|
filter: "score > 0.7"
|
|
description: "Number of high-risk customers"
|