mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
111 lines
3.7 KiB
Python
111 lines
3.7 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""Run a semantic layer query against the b2b_saas SQLite database.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
uv run python scripts/slquery.py '{"measures":["count(opportunities.opportunity_id)"],"dimensions":["accounts.segment"]}'
|
||
|
|
uv run python scripts/slquery.py '{"measures":["churn_risk.avg_risk_score"],"dimensions":["accounts.industry"]}'
|
||
|
|
echo '{"measures":["sum(contracts.arr)"],"dimensions":["accounts.segment"]}' | uv run python scripts/slquery.py --stdin
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
import sqlite3
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from semantic_layer.engine import SemanticEngine
|
||
|
|
|
||
|
|
SOURCES_DIR = Path(__file__).resolve().parent.parent / "sources" / "b2b_saas"
|
||
|
|
DB_PATH = Path(
|
||
|
|
os.environ.get("KLO_B2B_SQLITE_DB", "sample-data-generator/b2b_data.db")
|
||
|
|
).expanduser()
|
||
|
|
|
||
|
|
# sqlglot's sqlite dialect handles most transpilation, but has a few gaps.
|
||
|
|
# These fixups patch what sqlglot misses.
|
||
|
|
_SQLITE_FIXUPS = [
|
||
|
|
# GROUP_CONCAT(DISTINCT x, sep) → GROUP_CONCAT(DISTINCT x) — sqlite
|
||
|
|
# only allows 1 arg with DISTINCT
|
||
|
|
(r"GROUP_CONCAT\(DISTINCT (\w+),\s*'[^']*'\)", r"GROUP_CONCAT(DISTINCT \1)"),
|
||
|
|
# CURRENT_DATE - col → integer days via julianday
|
||
|
|
(
|
||
|
|
r"CURRENT_DATE - DATE\((\w+)\)",
|
||
|
|
r"CAST(julianday('now') - julianday(\1) AS INTEGER)",
|
||
|
|
),
|
||
|
|
(r"CURRENT_DATE - (\w+)", r"CAST(julianday('now') - julianday(\1) AS INTEGER)"),
|
||
|
|
# col - CURRENT_DATE → integer days via julianday
|
||
|
|
(r"(\w+) - CURRENT_DATE", r"CAST(julianday(\1) - julianday('now') AS INTEGER)"),
|
||
|
|
# CURRENT_DATE > col → julianday comparison
|
||
|
|
(r"CURRENT_DATE > (\w+)", r"julianday('now') > julianday(\1)"),
|
||
|
|
# NULLS LAST — not supported in sqlite
|
||
|
|
(r"\s+NULLS LAST", ""),
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
def fixup_sqlite(sql: str) -> str:
|
||
|
|
for pattern, repl in _SQLITE_FIXUPS:
|
||
|
|
sql = re.sub(pattern, repl, sql)
|
||
|
|
return sql
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> None:
|
||
|
|
p = argparse.ArgumentParser(description="Run SL query against b2b_saas SQLite DB")
|
||
|
|
p.add_argument("query", nargs="?", help="JSON query string")
|
||
|
|
p.add_argument("--stdin", action="store_true", help="Read JSON from stdin")
|
||
|
|
p.add_argument(
|
||
|
|
"--sql-only", action="store_true", help="Print SQL without executing"
|
||
|
|
)
|
||
|
|
p.add_argument("--db", default=str(DB_PATH), help="Path to SQLite database")
|
||
|
|
p.add_argument(
|
||
|
|
"--sources", default=str(SOURCES_DIR), help="Path to sources directory"
|
||
|
|
)
|
||
|
|
args = p.parse_args()
|
||
|
|
|
||
|
|
if args.stdin:
|
||
|
|
query_dict = json.loads(sys.stdin.read())
|
||
|
|
elif args.query:
|
||
|
|
query_dict = json.loads(args.query)
|
||
|
|
else:
|
||
|
|
p.error("Provide a JSON query string or use --stdin")
|
||
|
|
|
||
|
|
# Use sqlite dialect — sqlglot handles STRING_AGG→GROUP_CONCAT,
|
||
|
|
# DECIMAL→REAL, ::DATE→DATE(), etc.
|
||
|
|
engine = SemanticEngine(args.sources, dialect="sqlite")
|
||
|
|
result = engine.query(query_dict)
|
||
|
|
sql = fixup_sqlite(result.sql)
|
||
|
|
|
||
|
|
if args.sql_only:
|
||
|
|
print(sql)
|
||
|
|
return
|
||
|
|
|
||
|
|
conn = sqlite3.connect(args.db)
|
||
|
|
conn.row_factory = sqlite3.Row
|
||
|
|
try:
|
||
|
|
rows = conn.execute(sql).fetchall()
|
||
|
|
except sqlite3.OperationalError as e:
|
||
|
|
print(f"SQL error: {e}", file=sys.stderr)
|
||
|
|
print(f"\nGenerated SQL:\n{sql}", file=sys.stderr)
|
||
|
|
sys.exit(1)
|
||
|
|
finally:
|
||
|
|
conn.close()
|
||
|
|
|
||
|
|
if not rows:
|
||
|
|
print("(no rows)")
|
||
|
|
return
|
||
|
|
|
||
|
|
cols = rows[0].keys()
|
||
|
|
widths = [max(len(str(c)), max(len(str(r[c])) for r in rows)) for c in cols]
|
||
|
|
header = " ".join(str(c).ljust(w) for c, w in zip(cols, widths))
|
||
|
|
sep = " ".join("-" * w for w in widths)
|
||
|
|
print(header)
|
||
|
|
print(sep)
|
||
|
|
for r in rows:
|
||
|
|
print(" ".join(str(r[c]).ljust(w) for c, w in zip(cols, widths)))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|