mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
33
examples/postgres-historic/scripts/generate-workload.sh
Executable file
33
examples/postgres-historic/scripts/generate-workload.sh
Executable file
|
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml"
|
||||
MODE="${1:-base}"
|
||||
|
||||
run_sql() {
|
||||
local user="$1"
|
||||
local password="$2"
|
||||
local sql="$3"
|
||||
docker compose -f "$COMPOSE_FILE" exec -T -e PGPASSWORD="$password" postgres \
|
||||
psql -h 127.0.0.1 -U "$user" -d analytics -v ON_ERROR_STOP=1 -c "$sql" >/dev/null
|
||||
}
|
||||
|
||||
for _ in $(seq 1 12); do
|
||||
run_sql app_user app_pass "SELECT c.region, count(*) AS order_count FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.status = 'paid' GROUP BY c.region ORDER BY c.region"
|
||||
done
|
||||
|
||||
for _ in $(seq 1 7); do
|
||||
run_sql app_user app_pass "SELECT c.plan, sum(o.total) AS revenue FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.created_at >= now() - interval '14 days' GROUP BY c.plan ORDER BY revenue DESC"
|
||||
done
|
||||
|
||||
for _ in $(seq 1 5); do
|
||||
run_sql etl_user etl_pass "SELECT e.event_name, count(*) AS event_count FROM events e JOIN customers c ON c.id = e.customer_id WHERE c.region = 'na' GROUP BY e.event_name ORDER BY event_count DESC"
|
||||
done
|
||||
|
||||
if [[ "$MODE" == "extra" ]]; then
|
||||
for _ in $(seq 1 4); do
|
||||
run_sql etl_user etl_pass "SELECT c.region, avg(o.total) AS avg_total FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.status <> 'refunded' GROUP BY c.region ORDER BY avg_total DESC"
|
||||
done
|
||||
fi
|
||||
152
examples/postgres-historic/scripts/smoke.sh
Executable file
152
examples/postgres-historic/scripts/smoke.sh
Executable file
|
|
@ -0,0 +1,152 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
KLO_ROOT="$(cd "$EXAMPLE_DIR/../.." && pwd)"
|
||||
REPO_ROOT="$(cd "$KLO_ROOT/.." && pwd)"
|
||||
COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml"
|
||||
PROJECT_PARENT="${KLO_POSTGRES_HISTORIC_PROJECT_PARENT:-$(mktemp -d)}"
|
||||
PROJECT_DIR="$PROJECT_PARENT/postgres-historic-klo"
|
||||
KLO_BIN="$KLO_ROOT/packages/cli/dist/bin.js"
|
||||
PYTHON_SERVICE_LOG="$PROJECT_PARENT/python-service.log"
|
||||
PYTHON_SERVICE_PID=""
|
||||
|
||||
cleanup() {
|
||||
if [[ -n "$PYTHON_SERVICE_PID" ]]; then
|
||||
kill "$PYTHON_SERVICE_PID" >/dev/null 2>&1 || true
|
||||
fi
|
||||
if [[ "${KLO_POSTGRES_HISTORIC_KEEP_DOCKER:-0}" != "1" ]]; then
|
||||
docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
start_sql_analysis_if_needed() {
|
||||
if [[ -n "${KLO_SQL_ANALYSIS_URL:-}" ]]; then
|
||||
return
|
||||
fi
|
||||
if [[ ! -d "$REPO_ROOT/python-service/.venv" ]]; then
|
||||
echo "Set KLO_SQL_ANALYSIS_URL or create python-service/.venv before running this smoke." >&2
|
||||
exit 1
|
||||
fi
|
||||
(
|
||||
cd "$REPO_ROOT/python-service"
|
||||
source .venv/bin/activate
|
||||
uvicorn app.main:app --host 127.0.0.1 --port 18081 >"$PYTHON_SERVICE_LOG" 2>&1
|
||||
) &
|
||||
PYTHON_SERVICE_PID="$!"
|
||||
export KLO_SQL_ANALYSIS_URL="http://127.0.0.1:18081"
|
||||
for _ in $(seq 1 60); do
|
||||
if curl -fsS "$KLO_SQL_ANALYSIS_URL/health" >/dev/null 2>&1; then
|
||||
return
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "SQL analysis service did not become healthy. Log: $PYTHON_SERVICE_LOG" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
latest_manifest() {
|
||||
find "$PROJECT_DIR/raw-sources/warehouse/historic-sql" -name manifest.json | sort | tail -n 1
|
||||
}
|
||||
|
||||
assert_manifest() {
|
||||
local manifest_path="$1"
|
||||
local expected_first_run="$2"
|
||||
node - "$manifest_path" "$expected_first_run" <<'NODE'
|
||||
const { readFileSync } = require('node:fs');
|
||||
const manifestPath = process.argv[2];
|
||||
const expectedFirstRun = process.argv[3] === 'true';
|
||||
const manifest = JSON.parse(readFileSync(manifestPath, 'utf8'));
|
||||
if (manifest.dialect !== 'postgres') throw new Error(`Expected dialect postgres, got ${manifest.dialect}`);
|
||||
if (manifest.degraded !== true) throw new Error('Expected degraded:true for Postgres PGSS v1');
|
||||
if (manifest.baselineFirstRun !== expectedFirstRun) {
|
||||
throw new Error(`Expected baselineFirstRun:${expectedFirstRun}, got ${manifest.baselineFirstRun}`);
|
||||
}
|
||||
if (!manifest.pgServerVersion) throw new Error('Expected pgServerVersion');
|
||||
if (!manifest.statsResetAt) throw new Error('Expected statsResetAt');
|
||||
if (!Array.isArray(manifest.templates) || manifest.templates.length === 0) {
|
||||
throw new Error('Expected at least one staged historic-SQL template');
|
||||
}
|
||||
NODE
|
||||
}
|
||||
|
||||
run_historic_stage_only() {
|
||||
local job_id="$1"
|
||||
node - "$KLO_ROOT" "$PROJECT_DIR" "$job_id" <<'NODE'
|
||||
const { join } = await import('node:path');
|
||||
|
||||
const kloRoot = process.argv[2];
|
||||
const projectDir = process.argv[3];
|
||||
const jobId = process.argv[4];
|
||||
const { loadKloProject } = await import(join(kloRoot, 'packages/context/dist/project/index.js'));
|
||||
const { runLocalStageOnlyIngest } = await import(join(kloRoot, 'packages/context/dist/ingest/index.js'));
|
||||
const { createKloCliLocalIngestAdapters } = await import(join(kloRoot, 'packages/cli/dist/local-adapters.js'));
|
||||
|
||||
const project = await loadKloProject({ projectDir });
|
||||
const adapters = createKloCliLocalIngestAdapters(project, { historicSqlConnectionId: 'warehouse' });
|
||||
const adapter = adapters.find((candidate) => candidate.source === 'historic-sql');
|
||||
if (!adapter) throw new Error('historic-sql adapter was not registered for local run');
|
||||
const record = await runLocalStageOnlyIngest({
|
||||
project,
|
||||
adapters,
|
||||
adapter: 'historic-sql',
|
||||
connectionId: 'warehouse',
|
||||
trigger: 'manual_resync',
|
||||
jobId,
|
||||
});
|
||||
await adapter.onPullSucceeded?.({
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'historic-sql',
|
||||
syncId: record.syncId,
|
||||
trigger: 'manual_resync',
|
||||
completedAt: new Date(record.completedAt),
|
||||
stagedDir: join(project.projectDir, '.klo/cache/local-ingest', jobId, 'staged'),
|
||||
});
|
||||
console.log(record.syncId);
|
||||
NODE
|
||||
}
|
||||
|
||||
cd "$KLO_ROOT"
|
||||
pnpm --filter @klo/context run build
|
||||
pnpm --filter @klo/cli run build
|
||||
start_sql_analysis_if_needed
|
||||
|
||||
docker compose -f "$COMPOSE_FILE" up -d --wait
|
||||
"$EXAMPLE_DIR/scripts/generate-workload.sh" base
|
||||
|
||||
export WAREHOUSE_DATABASE_URL="${WAREHOUSE_DATABASE_URL:-postgresql://klo_reader:klo_reader@127.0.0.1:55432/analytics}" # pragma: allowlist secret
|
||||
node "$KLO_BIN" --project-dir "$PROJECT_DIR" setup \
|
||||
--new \
|
||||
--skip-agents \
|
||||
--skip-llm \
|
||||
--skip-embeddings \
|
||||
--skip-sources \
|
||||
--database postgres \
|
||||
--new-database-connection-id warehouse \
|
||||
--database-url env:WAREHOUSE_DATABASE_URL \
|
||||
--database-schema public \
|
||||
--enable-historic-sql \
|
||||
--historic-sql-min-calls 2 \
|
||||
--yes \
|
||||
--no-input
|
||||
|
||||
run_historic_stage_only "historic-first-$$"
|
||||
FIRST_MANIFEST="$(latest_manifest)"
|
||||
assert_manifest "$FIRST_MANIFEST" true
|
||||
|
||||
"$EXAMPLE_DIR/scripts/generate-workload.sh" extra
|
||||
run_historic_stage_only "historic-second-$$"
|
||||
SECOND_MANIFEST="$(latest_manifest)"
|
||||
assert_manifest "$SECOND_MANIFEST" false
|
||||
|
||||
docker compose -f "$COMPOSE_FILE" exec -T postgres \
|
||||
psql -U postgres -d analytics -v ON_ERROR_STOP=1 -c "SELECT pg_stat_statements_reset();" >/dev/null
|
||||
"$EXAMPLE_DIR/scripts/generate-workload.sh" extra
|
||||
run_historic_stage_only "historic-reset-$$"
|
||||
RESET_MANIFEST="$(latest_manifest)"
|
||||
assert_manifest "$RESET_MANIFEST" true
|
||||
|
||||
echo "Postgres historic SQL smoke passed"
|
||||
echo "Project dir: $PROJECT_DIR"
|
||||
Loading…
Add table
Add a link
Reference in a new issue