mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-19 08:28:06 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
115
examples/postgres-historic/README.md
Normal file
115
examples/postgres-historic/README.md
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
# Postgres Historic SQL Example
|
||||
|
||||
This example is a manual smoke for Postgres historic-SQL ingest through
|
||||
`pg_stat_statements`. It starts Postgres 14 with the extension preloaded,
|
||||
generates query workload under separate users, runs `klo setup` with
|
||||
`--enable-historic-sql`, and verifies three local ingest runs:
|
||||
|
||||
- first run creates a fresh PGSS baseline
|
||||
- second run emits only positive deltas
|
||||
- reset run treats `pg_stat_statements_reset()` as a fresh baseline
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker with Compose v2
|
||||
- Node and pnpm matching the KLO workspace
|
||||
- `python-service/.venv` already created, or `KLO_SQL_ANALYSIS_URL` pointing at
|
||||
a running service that exposes `/api/sql/analyze-for-fingerprint`
|
||||
|
||||
## Run
|
||||
|
||||
From the KLO repository root:
|
||||
|
||||
```bash
|
||||
examples/postgres-historic/scripts/smoke.sh
|
||||
```
|
||||
|
||||
The smoke creates a temporary KLO project, starts Postgres on
|
||||
`127.0.0.1:55432`, and uses this connection URL:
|
||||
|
||||
```bash
|
||||
postgresql://klo_reader:klo_reader@127.0.0.1:55432/analytics # pragma: allowlist secret
|
||||
```
|
||||
|
||||
Set `KLO_POSTGRES_HISTORIC_KEEP_DOCKER=1` to leave the container running after
|
||||
the script exits.
|
||||
|
||||
The smoke validates the historic-SQL raw snapshot path without requiring LLM
|
||||
credentials. It uses KLO's local stage-only ingest API after `klo setup` so the
|
||||
PGSS baseline and delta behavior can be checked independently from curation.
|
||||
|
||||
## Manual Commands
|
||||
|
||||
Start Postgres and generate the base workload:
|
||||
|
||||
```bash
|
||||
docker compose -f examples/postgres-historic/docker-compose.yml up -d --wait
|
||||
examples/postgres-historic/scripts/generate-workload.sh base
|
||||
```
|
||||
|
||||
Create a project and enable historic SQL:
|
||||
|
||||
```bash
|
||||
export WAREHOUSE_DATABASE_URL=postgresql://klo_reader:klo_reader@127.0.0.1:55432/analytics # pragma: allowlist secret
|
||||
pnpm --filter @klo/cli run build
|
||||
node packages/cli/dist/bin.js --project-dir /tmp/klo-postgres-historic setup \
|
||||
--new \
|
||||
--skip-agents \
|
||||
--skip-llm \
|
||||
--skip-embeddings \
|
||||
--skip-sources \
|
||||
--database postgres \
|
||||
--new-database-connection-id warehouse \
|
||||
--database-url env:WAREHOUSE_DATABASE_URL \
|
||||
--database-schema public \
|
||||
--enable-historic-sql \
|
||||
--historic-sql-min-calls 2 \
|
||||
--yes \
|
||||
--no-input
|
||||
```
|
||||
|
||||
### Readiness check
|
||||
|
||||
```bash
|
||||
pnpm run klo -- dev doctor --project-dir /tmp/klo-postgres-historic --no-input
|
||||
```
|
||||
|
||||
The installed CLI form is `klo dev doctor --project-dir
|
||||
/tmp/klo-postgres-historic --no-input`. Expected output includes `PASS Postgres
|
||||
Historic SQL (warehouse)` when `pg_stat_statements` is installed,
|
||||
`pg_read_all_stats` is granted, tracking is enabled, and
|
||||
`pg_stat_statements.max` is at least 5000.
|
||||
|
||||
Run local historic-SQL ingest:
|
||||
|
||||
```bash
|
||||
node packages/cli/dist/bin.js --project-dir /tmp/klo-postgres-historic dev ingest run \
|
||||
--connection-id warehouse \
|
||||
--adapter historic-sql \
|
||||
--plain \
|
||||
--no-input
|
||||
```
|
||||
|
||||
The full `dev ingest run` path also runs curation work units, so it requires a
|
||||
configured LLM provider.
|
||||
|
||||
Inspect the latest manifest:
|
||||
|
||||
```bash
|
||||
find /tmp/klo-postgres-historic/raw-sources/warehouse/historic-sql -name manifest.json | sort | tail -n 1
|
||||
```
|
||||
|
||||
The manifest should have `dialect: "postgres"`, `degraded: true`,
|
||||
`baselineFirstRun: true` on the first run, and populated `pgServerVersion` and
|
||||
`statsResetAt`.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Missing extension: confirm `shared_preload_libraries=pg_stat_statements` and
|
||||
`CREATE EXTENSION pg_stat_statements;` both happened in the `analytics`
|
||||
database.
|
||||
- Missing grants: confirm `GRANT pg_read_all_stats TO klo_reader;`.
|
||||
- Empty templates: rerun `scripts/generate-workload.sh base` and keep
|
||||
`--historic-sql-min-calls 2` for the smoke.
|
||||
- SQL-analysis failures: set `KLO_SQL_ANALYSIS_URL` to the running service URL
|
||||
or create `python-service/.venv` before running `scripts/smoke.sh`.
|
||||
24
examples/postgres-historic/docker-compose.yml
Normal file
24
examples/postgres-historic/docker-compose.yml
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
services:
|
||||
postgres:
|
||||
image: postgres:14
|
||||
command:
|
||||
- postgres
|
||||
- -c
|
||||
- shared_preload_libraries=pg_stat_statements
|
||||
- -c
|
||||
- pg_stat_statements.track=top
|
||||
- -c
|
||||
- pg_stat_statements.max=10000
|
||||
environment:
|
||||
POSTGRES_DB: analytics
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres # pragma: allowlist secret
|
||||
ports:
|
||||
- "55432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres -d analytics"]
|
||||
interval: 2s
|
||||
timeout: 5s
|
||||
retries: 30
|
||||
volumes:
|
||||
- ./init:/docker-entrypoint-initdb.d:ro
|
||||
51
examples/postgres-historic/init/001-schema.sql
Normal file
51
examples/postgres-historic/init/001-schema.sql
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
|
||||
|
||||
CREATE ROLE app_user LOGIN PASSWORD 'app_pass';
|
||||
CREATE ROLE etl_user LOGIN PASSWORD 'etl_pass';
|
||||
CREATE ROLE klo_reader LOGIN PASSWORD 'klo_reader';
|
||||
|
||||
GRANT pg_read_all_stats TO klo_reader;
|
||||
|
||||
CREATE TABLE customers (
|
||||
id integer PRIMARY KEY,
|
||||
region text NOT NULL,
|
||||
plan text NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE orders (
|
||||
id integer PRIMARY KEY,
|
||||
customer_id integer NOT NULL REFERENCES customers(id),
|
||||
status text NOT NULL,
|
||||
total numeric(12, 2) NOT NULL,
|
||||
created_at timestamptz NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE events (
|
||||
id integer PRIMARY KEY,
|
||||
customer_id integer NOT NULL REFERENCES customers(id),
|
||||
event_name text NOT NULL,
|
||||
occurred_at timestamptz NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO customers (id, region, plan) VALUES
|
||||
(1, 'na', 'enterprise'),
|
||||
(2, 'na', 'team'),
|
||||
(3, 'eu', 'enterprise'),
|
||||
(4, 'apac', 'team');
|
||||
|
||||
INSERT INTO orders (id, customer_id, status, total, created_at) VALUES
|
||||
(1, 1, 'paid', 125.50, now() - interval '9 days'),
|
||||
(2, 1, 'paid', 89.00, now() - interval '4 days'),
|
||||
(3, 2, 'pending', 42.00, now() - interval '2 days'),
|
||||
(4, 3, 'paid', 301.25, now() - interval '1 day'),
|
||||
(5, 4, 'refunded', 77.70, now() - interval '3 hours');
|
||||
|
||||
INSERT INTO events (id, customer_id, event_name, occurred_at) VALUES
|
||||
(1, 1, 'dashboard_viewed', now() - interval '1 day'),
|
||||
(2, 1, 'export_started', now() - interval '8 hours'),
|
||||
(3, 2, 'dashboard_viewed', now() - interval '7 hours'),
|
||||
(4, 3, 'sync_completed', now() - interval '6 hours'),
|
||||
(5, 4, 'dashboard_viewed', now() - interval '5 hours');
|
||||
|
||||
GRANT USAGE ON SCHEMA public TO app_user, etl_user, klo_reader;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA public TO app_user, etl_user, klo_reader;
|
||||
33
examples/postgres-historic/scripts/generate-workload.sh
Executable file
33
examples/postgres-historic/scripts/generate-workload.sh
Executable file
|
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml"
|
||||
MODE="${1:-base}"
|
||||
|
||||
run_sql() {
|
||||
local user="$1"
|
||||
local password="$2"
|
||||
local sql="$3"
|
||||
docker compose -f "$COMPOSE_FILE" exec -T -e PGPASSWORD="$password" postgres \
|
||||
psql -h 127.0.0.1 -U "$user" -d analytics -v ON_ERROR_STOP=1 -c "$sql" >/dev/null
|
||||
}
|
||||
|
||||
for _ in $(seq 1 12); do
|
||||
run_sql app_user app_pass "SELECT c.region, count(*) AS order_count FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.status = 'paid' GROUP BY c.region ORDER BY c.region"
|
||||
done
|
||||
|
||||
for _ in $(seq 1 7); do
|
||||
run_sql app_user app_pass "SELECT c.plan, sum(o.total) AS revenue FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.created_at >= now() - interval '14 days' GROUP BY c.plan ORDER BY revenue DESC"
|
||||
done
|
||||
|
||||
for _ in $(seq 1 5); do
|
||||
run_sql etl_user etl_pass "SELECT e.event_name, count(*) AS event_count FROM events e JOIN customers c ON c.id = e.customer_id WHERE c.region = 'na' GROUP BY e.event_name ORDER BY event_count DESC"
|
||||
done
|
||||
|
||||
if [[ "$MODE" == "extra" ]]; then
|
||||
for _ in $(seq 1 4); do
|
||||
run_sql etl_user etl_pass "SELECT c.region, avg(o.total) AS avg_total FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.status <> 'refunded' GROUP BY c.region ORDER BY avg_total DESC"
|
||||
done
|
||||
fi
|
||||
152
examples/postgres-historic/scripts/smoke.sh
Executable file
152
examples/postgres-historic/scripts/smoke.sh
Executable file
|
|
@ -0,0 +1,152 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
KLO_ROOT="$(cd "$EXAMPLE_DIR/../.." && pwd)"
|
||||
REPO_ROOT="$(cd "$KLO_ROOT/.." && pwd)"
|
||||
COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml"
|
||||
PROJECT_PARENT="${KLO_POSTGRES_HISTORIC_PROJECT_PARENT:-$(mktemp -d)}"
|
||||
PROJECT_DIR="$PROJECT_PARENT/postgres-historic-klo"
|
||||
KLO_BIN="$KLO_ROOT/packages/cli/dist/bin.js"
|
||||
PYTHON_SERVICE_LOG="$PROJECT_PARENT/python-service.log"
|
||||
PYTHON_SERVICE_PID=""
|
||||
|
||||
cleanup() {
|
||||
if [[ -n "$PYTHON_SERVICE_PID" ]]; then
|
||||
kill "$PYTHON_SERVICE_PID" >/dev/null 2>&1 || true
|
||||
fi
|
||||
if [[ "${KLO_POSTGRES_HISTORIC_KEEP_DOCKER:-0}" != "1" ]]; then
|
||||
docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
start_sql_analysis_if_needed() {
|
||||
if [[ -n "${KLO_SQL_ANALYSIS_URL:-}" ]]; then
|
||||
return
|
||||
fi
|
||||
if [[ ! -d "$REPO_ROOT/python-service/.venv" ]]; then
|
||||
echo "Set KLO_SQL_ANALYSIS_URL or create python-service/.venv before running this smoke." >&2
|
||||
exit 1
|
||||
fi
|
||||
(
|
||||
cd "$REPO_ROOT/python-service"
|
||||
source .venv/bin/activate
|
||||
uvicorn app.main:app --host 127.0.0.1 --port 18081 >"$PYTHON_SERVICE_LOG" 2>&1
|
||||
) &
|
||||
PYTHON_SERVICE_PID="$!"
|
||||
export KLO_SQL_ANALYSIS_URL="http://127.0.0.1:18081"
|
||||
for _ in $(seq 1 60); do
|
||||
if curl -fsS "$KLO_SQL_ANALYSIS_URL/health" >/dev/null 2>&1; then
|
||||
return
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "SQL analysis service did not become healthy. Log: $PYTHON_SERVICE_LOG" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
latest_manifest() {
|
||||
find "$PROJECT_DIR/raw-sources/warehouse/historic-sql" -name manifest.json | sort | tail -n 1
|
||||
}
|
||||
|
||||
assert_manifest() {
|
||||
local manifest_path="$1"
|
||||
local expected_first_run="$2"
|
||||
node - "$manifest_path" "$expected_first_run" <<'NODE'
|
||||
const { readFileSync } = require('node:fs');
|
||||
const manifestPath = process.argv[2];
|
||||
const expectedFirstRun = process.argv[3] === 'true';
|
||||
const manifest = JSON.parse(readFileSync(manifestPath, 'utf8'));
|
||||
if (manifest.dialect !== 'postgres') throw new Error(`Expected dialect postgres, got ${manifest.dialect}`);
|
||||
if (manifest.degraded !== true) throw new Error('Expected degraded:true for Postgres PGSS v1');
|
||||
if (manifest.baselineFirstRun !== expectedFirstRun) {
|
||||
throw new Error(`Expected baselineFirstRun:${expectedFirstRun}, got ${manifest.baselineFirstRun}`);
|
||||
}
|
||||
if (!manifest.pgServerVersion) throw new Error('Expected pgServerVersion');
|
||||
if (!manifest.statsResetAt) throw new Error('Expected statsResetAt');
|
||||
if (!Array.isArray(manifest.templates) || manifest.templates.length === 0) {
|
||||
throw new Error('Expected at least one staged historic-SQL template');
|
||||
}
|
||||
NODE
|
||||
}
|
||||
|
||||
run_historic_stage_only() {
|
||||
local job_id="$1"
|
||||
node - "$KLO_ROOT" "$PROJECT_DIR" "$job_id" <<'NODE'
|
||||
const { join } = await import('node:path');
|
||||
|
||||
const kloRoot = process.argv[2];
|
||||
const projectDir = process.argv[3];
|
||||
const jobId = process.argv[4];
|
||||
const { loadKloProject } = await import(join(kloRoot, 'packages/context/dist/project/index.js'));
|
||||
const { runLocalStageOnlyIngest } = await import(join(kloRoot, 'packages/context/dist/ingest/index.js'));
|
||||
const { createKloCliLocalIngestAdapters } = await import(join(kloRoot, 'packages/cli/dist/local-adapters.js'));
|
||||
|
||||
const project = await loadKloProject({ projectDir });
|
||||
const adapters = createKloCliLocalIngestAdapters(project, { historicSqlConnectionId: 'warehouse' });
|
||||
const adapter = adapters.find((candidate) => candidate.source === 'historic-sql');
|
||||
if (!adapter) throw new Error('historic-sql adapter was not registered for local run');
|
||||
const record = await runLocalStageOnlyIngest({
|
||||
project,
|
||||
adapters,
|
||||
adapter: 'historic-sql',
|
||||
connectionId: 'warehouse',
|
||||
trigger: 'manual_resync',
|
||||
jobId,
|
||||
});
|
||||
await adapter.onPullSucceeded?.({
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'historic-sql',
|
||||
syncId: record.syncId,
|
||||
trigger: 'manual_resync',
|
||||
completedAt: new Date(record.completedAt),
|
||||
stagedDir: join(project.projectDir, '.klo/cache/local-ingest', jobId, 'staged'),
|
||||
});
|
||||
console.log(record.syncId);
|
||||
NODE
|
||||
}
|
||||
|
||||
cd "$KLO_ROOT"
|
||||
pnpm --filter @klo/context run build
|
||||
pnpm --filter @klo/cli run build
|
||||
start_sql_analysis_if_needed
|
||||
|
||||
docker compose -f "$COMPOSE_FILE" up -d --wait
|
||||
"$EXAMPLE_DIR/scripts/generate-workload.sh" base
|
||||
|
||||
export WAREHOUSE_DATABASE_URL="${WAREHOUSE_DATABASE_URL:-postgresql://klo_reader:klo_reader@127.0.0.1:55432/analytics}" # pragma: allowlist secret
|
||||
node "$KLO_BIN" --project-dir "$PROJECT_DIR" setup \
|
||||
--new \
|
||||
--skip-agents \
|
||||
--skip-llm \
|
||||
--skip-embeddings \
|
||||
--skip-sources \
|
||||
--database postgres \
|
||||
--new-database-connection-id warehouse \
|
||||
--database-url env:WAREHOUSE_DATABASE_URL \
|
||||
--database-schema public \
|
||||
--enable-historic-sql \
|
||||
--historic-sql-min-calls 2 \
|
||||
--yes \
|
||||
--no-input
|
||||
|
||||
run_historic_stage_only "historic-first-$$"
|
||||
FIRST_MANIFEST="$(latest_manifest)"
|
||||
assert_manifest "$FIRST_MANIFEST" true
|
||||
|
||||
"$EXAMPLE_DIR/scripts/generate-workload.sh" extra
|
||||
run_historic_stage_only "historic-second-$$"
|
||||
SECOND_MANIFEST="$(latest_manifest)"
|
||||
assert_manifest "$SECOND_MANIFEST" false
|
||||
|
||||
docker compose -f "$COMPOSE_FILE" exec -T postgres \
|
||||
psql -U postgres -d analytics -v ON_ERROR_STOP=1 -c "SELECT pg_stat_statements_reset();" >/dev/null
|
||||
"$EXAMPLE_DIR/scripts/generate-workload.sh" extra
|
||||
run_historic_stage_only "historic-reset-$$"
|
||||
RESET_MANIFEST="$(latest_manifest)"
|
||||
assert_manifest "$RESET_MANIFEST" true
|
||||
|
||||
echo "Postgres historic SQL smoke passed"
|
||||
echo "Project dir: $PROJECT_DIR"
|
||||
Loading…
Add table
Add a link
Reference in a new issue