mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
166
python/klo-sl/scripts/tpch_runner.py
Normal file
166
python/klo-sl/scripts/tpch_runner.py
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Run TPC-H queries end-to-end: generate data + semantic layer SQL + execute.
|
||||
|
||||
Usage:
|
||||
uv run python scripts/tpch_runner.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import duckdb
|
||||
import sqlglot
|
||||
|
||||
from semantic_layer.engine import SemanticEngine
|
||||
|
||||
TPCH_TABLES = [
|
||||
"region",
|
||||
"nation",
|
||||
"supplier",
|
||||
"customer",
|
||||
"part",
|
||||
"partsupp",
|
||||
"orders",
|
||||
"lineitem",
|
||||
]
|
||||
|
||||
|
||||
def setup_tpch(sf: float = 0.01) -> duckdb.DuckDBPyConnection:
|
||||
"""Create in-memory DuckDB with TPC-H data at the given scale factor."""
|
||||
conn = duckdb.connect()
|
||||
conn.execute("INSTALL tpch; LOAD tpch")
|
||||
conn.execute(f"CALL dbgen(sf={sf})")
|
||||
# YAML files use public.<table> — create views to match
|
||||
conn.execute("CREATE SCHEMA IF NOT EXISTS public")
|
||||
for t in TPCH_TABLES:
|
||||
conn.execute(f"CREATE VIEW public.{t} AS SELECT * FROM main.{t}")
|
||||
return conn
|
||||
|
||||
|
||||
def run_query(
|
||||
conn: duckdb.DuckDBPyConnection,
|
||||
engine: SemanticEngine,
|
||||
title: str,
|
||||
query_dict: dict,
|
||||
) -> None:
|
||||
"""Generate SQL via semantic layer, execute it, and print results."""
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f" {title}")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
print("\n>> Request:")
|
||||
print(json.dumps(query_dict, indent=2))
|
||||
|
||||
result = engine.query(query_dict)
|
||||
formatted_sql = sqlglot.transpile(
|
||||
result.sql, read=result.dialect, write=result.dialect, pretty=True
|
||||
)[0]
|
||||
print(f"\n-- dialect: {result.dialect}")
|
||||
print(formatted_sql)
|
||||
|
||||
cursor = conn.execute(result.sql)
|
||||
col_names = [desc[0] for desc in cursor.description]
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# Simple table formatting
|
||||
widths = [
|
||||
max(len(str(c)), *(len(str(r[i])) for r in rows))
|
||||
for i, c in enumerate(col_names)
|
||||
]
|
||||
header = " ".join(str(c).ljust(w) for c, w in zip(col_names, widths))
|
||||
print(f"\n{header}")
|
||||
print(" ".join("-" * w for w in widths))
|
||||
for row in rows:
|
||||
print(" ".join(str(v).ljust(w) for v, w in zip(row, widths)))
|
||||
print(f"\n({len(rows)} rows)")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
conn = setup_tpch()
|
||||
engine = SemanticEngine("sources/tpch", dialect="duckdb")
|
||||
|
||||
# Q1: Pricing summary by return flag / line status
|
||||
run_query(
|
||||
conn,
|
||||
engine,
|
||||
"Q1: Pricing Summary",
|
||||
{
|
||||
"measures": [
|
||||
"lineitem.revenue",
|
||||
"lineitem.total_quantity",
|
||||
"lineitem.avg_discount",
|
||||
"lineitem.line_count",
|
||||
],
|
||||
"dimensions": ["lineitem.l_returnflag", "lineitem.l_linestatus"],
|
||||
},
|
||||
)
|
||||
|
||||
# Q5-style: Revenue by nation (4-hop join) with ASIA filter
|
||||
run_query(
|
||||
conn,
|
||||
engine,
|
||||
"Q5: Revenue by Nation (ASIA)",
|
||||
{
|
||||
"measures": ["lineitem.revenue"],
|
||||
"dimensions": ["nation.n_name"],
|
||||
"filters": ["region.r_name = 'ASIA'"],
|
||||
},
|
||||
)
|
||||
|
||||
# Q3-style: Revenue by order month for BUILDING segment
|
||||
run_query(
|
||||
conn,
|
||||
engine,
|
||||
"Q3: Revenue by Month (BUILDING)",
|
||||
{
|
||||
"measures": ["lineitem.revenue"],
|
||||
"dimensions": [{"field": "orders.o_orderdate", "granularity": "month"}],
|
||||
"filters": ["customer.c_mktsegment = 'BUILDING'"],
|
||||
"limit": 12,
|
||||
},
|
||||
)
|
||||
|
||||
# Q10-style: Returned revenue by customer (filtered measure)
|
||||
run_query(
|
||||
conn,
|
||||
engine,
|
||||
"Q10: Returned Revenue by Customer",
|
||||
{
|
||||
"measures": ["lineitem.returned_revenue"],
|
||||
"dimensions": ["customer.c_name"],
|
||||
"order_by": [{"field": "lineitem.returned_revenue", "direction": "desc"}],
|
||||
"limit": 10,
|
||||
},
|
||||
)
|
||||
|
||||
# Multi-measure: revenue + charge + counts
|
||||
run_query(
|
||||
conn,
|
||||
engine,
|
||||
"Multi-measure: Revenue, Charge, Counts",
|
||||
{
|
||||
"measures": [
|
||||
"lineitem.revenue",
|
||||
"lineitem.charge",
|
||||
"orders.order_count",
|
||||
],
|
||||
"dimensions": ["customer.c_mktsegment"],
|
||||
},
|
||||
)
|
||||
|
||||
# Supply cost by nation (through partsupp bridge)
|
||||
run_query(
|
||||
conn,
|
||||
engine,
|
||||
"Supply Cost by Nation",
|
||||
{
|
||||
"measures": ["partsupp.total_supply_cost"],
|
||||
"dimensions": ["nation.n_name"],
|
||||
"limit": 10,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue